In [None]:
# user being used requires atleast "ResourceGroupsandTagEditorReadOnlyAccess" policy
# user defined by user profile at command line before running program 
#       see  -  https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-configure.html
# by Stuart Heginbotham, last update 2019-08-07
# tested on account with arn in hundreds, using this program:
#                  -will incurr costs
#                  -is at your risk with absolutely no warranty or other promise of performance from the author

import boto3
import pandas as pd

client = boto3.client('resourcegroupstaggingapi')

In [None]:
# get_resources() with pagination  - assume per account and per region and that they are given

# create paginator as get resources support pagination
paginator = client.get_paginator('get_resources')
pages = paginator.paginate()

item_cnt=0
page_cnt=0
result=[]
for page in pages:
    page_cnt+=1
    for item in page['ResourceTagMappingList']:  #extract from json key 'ResourceTagMappingList'
        result.append(item) # creat list of resource ARNs that have their tag key values pairs listed
        item_cnt+=1
        
# covert list if resource ARNs into dataframe of resource ARNs        
df=pd.DataFrame(result)      
# split out the components of ARN into a list
df['ResourceARN']=df['ResourceARN'].apply(lambda s:s.split(':')) 
# convert list created to columns
df=pd.DataFrame(df['ResourceARN'].values.tolist()).add_prefix('ResourceARN').join(df['Tags'])
# retain only the column with AWS service and the tags asscoaited with it
df=df[['ResourceARN2','Tags']]
# rename the service column appropriately
df.columns=['AWSService','Tags'] 
# unpack the tag keys and key values into a key and value column and expand row so that each row is a single key and value pair for a service
df.set_index('AWSService',inplace=True)
df=df['Tags'].apply(pd.Series).reset_index().melt(id_vars='AWSService',value_name='Tags').drop('variable',axis=1).dropna()
df['TagKey']=df['Tags'].apply(lambda d:d['Key'])
df['TagValue']=df['Tags'].apply(lambda d:d['Value'])
df.drop('Tags',inplace=True,axis=1)

In [None]:
# optional display the tag key, service combination with the greates variety of values for the tag key
df.groupby(['TagKey','AWSService']).agg('count').sort_values('TagValue',ascending=False).head(10)

In [None]:
# create a summary dataframe with the count indicating the variety of tag key values for a given tag key, AWS service combination
newdf=df.groupby(['TagKey','AWSService']).agg('count')
newdf.reset_index(inplace=True)

In [None]:
# create a pivot table as the basis for the heatmap
pv=newdf.pivot(index='TagKey', columns='AWSService',values='TagValue')

In [None]:
# optional to switch on inline dsiplay of graph in anaconda
%matplotlib inline

In [None]:
import seaborn as sns
import matplotlib
import matplotlib.backends.backend_pdf

#Defining PDF output file and title
PDFFILE='tagplot.pdf'
PAGETITLE='EXAMPLE - tag distribution Training'

# define the file
thePdf = matplotlib.backends.backend_pdf.PdfPages(PDFFILE)
# set the plot size to a4 landscape
fig, ax = plt.subplots(figsize=(11.69,8.27))
# add the page title
fig.text(0.2, 0.9, PAGETITLE, fontsize=14, fontweight='bold')
# create heatmap from pivot table created earlier
ax=sns.heatmap(pv)
# retrieve the heatmap onto the figure
fig = ax.get_figure()
# save the heatmap to previously defined pdf file
thePdf.savefig(fig,orientation='landscape',papersize='a4' )
# close the file
thePdf.close()
