# Exercise 4-1: Create some Seaborn plots

## Get the data

In [None]:
import pandas as pd
# Note: The following import is used to avoid warnings related to seaborn
import warnings
warnings.filterwarnings("ignore", "is_categorical_dtype")
warnings.filterwarnings("ignore", "use_inf_as_na")

data_dir = '../../data'

In [None]:
awards_summary_df = pd.read_pickle(f'{data_dir}/pkl_files/awards_summary_diryr_df.pkl')
awards_summary_df.head()

In [None]:
# Use pivot() to make a wide format DataFrame
# with awd_years as rows, directorate abbreviations as columns,
# and award counts as values
awards_wide_df = awards_summary_df.pivot(index='awd_year',columns='dir_abbr',values='awd_count')
awards_wide_df.head()

## The basic Seaborn parameters

In [None]:
import seaborn as sns

In [None]:
sns.relplot(data=awards_summary_df, kind='line',
            x='awd_year', y='awd_count', hue='dir_abbr')

In [None]:
sns.lineplot(data=awards_summary_df, 
             x='awd_year', y='awd_count', hue='dir_abbr')

In [None]:
sns.relplot(data=awards_summary_df, kind='line',
            x='awd_year', y='awd_count', hue='dir_abbr', palette='bright',
            height=4, aspect=2)

In [None]:
sns.relplot(data=awards_summary_df, kind='line',
            x='awd_year', y='awd_count', hue='dir_abbr', palette='bright',
            height=4, aspect=1.25, 
            col='dir_abbr', col_wrap=4, legend=False)

## Enhancing plots

In [None]:
ax = sns.lineplot(data=awards_summary_df, 
             x='awd_year', y='awd_count', hue='dir_abbr')
ax.set(title='Awards by Directorate', ylabel='# of Awards', xlabel='awd_year')

In [None]:
g = sns.relplot(data=awards_summary_df, kind='line',
    x='awd_year', y='awd_count', hue='dir_abbr', aspect=1.5)
for ax in g.axes.flat:
    ax.set(title='Awards by Directorate', ylabel='# of Awards')

In [None]:
g = sns.relplot(data=awards_summary_df, kind='line',
    x='awd_year', y='awd_count', hue='dir_abbr', aspect=1.5)
for ax in g.axes.flat:
    ax.set(title='Awards by Directorate (2014-2024)', ylabel='# of Awards', xlabel='awd_year',
        xticks=[x for x in range(2014, 2025, 2)], 
        xlim=(2014,2024), ylim=(0,3000))

In [None]:
g = sns.relplot(data=awards_summary_df, kind='line',
    x='awd_year', y='awd_count', hue='dir_abbr', aspect=1.5)
for ax in g.axes.flat:
    ax.set(title='Awards by Directorate (2014-2024)', ylabel='# of Awards', xlabel='awd_year',
        xticks=[x for x in range(2014, 2025, 2)], 
        xlim=(2014,2024), ylim=(0,3000))
ax.tick_params('x', labelrotation=30)

In [None]:
sns.set_style('whitegrid')
sns.relplot(data=awards_summary_df.query('awd_year >= 2014 and awd_year <= 2024'),
    kind='line', x='awd_year', y='awd_count', hue='dir_abbr', aspect=1.25)

In [None]:
sns.set_style('darkgrid')
sns.relplot(data=awards_summary_df.query('awd_year >= 2014 and awd_year <= 2024'),
    kind='line', x='awd_year', y='awd_count', hue='dir_abbr', aspect=1.25)

In [None]:
sns.set_style('ticks')

In [None]:
g = sns.relplot(data=awards_summary_df.query('awd_year >= 2014 and awd_year <= 2024'),
    kind='line', x='awd_year', y='awd_count', hue='dir_abbr', legend=False,
    col='dir_abbr', col_wrap=4, height=4)
g.fig.suptitle('NSF Awards by Directorate (2014-2024)', y=1.025)
g.set(ylabel='# of Awards', 
      xticks=[x for x in range(2014,2025,3)], 
      ylim=(0,1750))

## Relational plots

In [None]:
sns.relplot(data=awards_summary_df, kind='line', x='awd_year', y='awd_count', hue='dir_abbr')

In [None]:
sns.relplot(data=awards_summary_df, kind='line', x='awd_year', y='awd_count', errorbar='sd')

In [None]:
sns.relplot(data=awards_summary_df, kind='scatter', x='awd_year', y='awd_count', hue='dir_abbr')

In [None]:
a = sns.scatterplot(data=awards_summary_df.query('awd_year >= 2014 and awd_year <= 2024'), 
    x='awd_year', y='awd_count', hue='dir_abbr', 
    size='awd_count', sizes=(10,100))
ax.set(xticks=[x for x in range(2014,2025,2)])

## Categorical plots

In [None]:
sns.barplot(data=awards_summary_df.query('awd_year in (2018,2024)'), 
            x='awd_year', y='awd_count', errorbar=('ci',75) )

In [None]:
sns.catplot(data=awards_summary_df.query('awd_year in (2018,2024)'), 
            kind='bar', x='awd_count', y='awd_year', hue='dir_abbr', orient='h')

In [None]:
sns.catplot(data=awards_summary_df.query('awd_year >= 2018 and awd_year <= 2024'), 
            kind='box', x='awd_year', y='awd_count')

In [None]:
sns.catplot(data=awards_summary_df,
            kind='box', x='awd_count', y='dir_abbr', orient='h')

## Distribution plots

In [None]:
sns.displot(data=awards_summary_df, kind='hist', x='awd_count')

In [None]:
sns.displot(data=awards_summary_df, kind='hist', x='awd_count', bins=16)

In [None]:
sns.displot(data=awards_summary_df, kind='kde', x='awd_count')

In [None]:
sns.displot(data=awards_summary_df, kind='ecdf', x='awd_count', hue='dir_abbr')

In [None]:
sns.displot(data=awards_summary_df, kind='hist', 
            x='awd_count', kde=True, bins=8)

In [None]:
sns.displot(data=awards_summary_df, kind='kde', x='awd_count', hue='dir_abbr', 
            fill=True, col='dir_abbr', col_wrap=4, height=3, legend=False, warn_singular=False)

## Other techniques for enhancing a plot

In [None]:
g = sns.relplot(
    data=awards_summary_df, 
    kind='line', x='awd_year', y='awd_count', hue='dir_abbr', aspect=1.25)
for ax in g.axes.flat:
    ax.set_title('Awards by Directorate (2014-2022)')
    ax.set_xlabel('')
    ax.set_xticks([x for x in range(2014,2023,2)])
    ax.set_xticklabels(['awd_year ' + str(x) for x in range(2014,2023,2)])
    ax.tick_params('x', labelrotation=30)
    ax.set_ylabel('# of Awards')
    ax.set_xlim(2014,2022)
    ax.set_ylim(0,3000)

In [None]:
ax = sns.lineplot(data=awards_summary_df, 
                  x='awd_year', y='awd_count', hue='dir_abbr')
ax.set_title('Awards by Directorate')
ax.set_ylabel('# of Awards')
ax.set_ylim(0,3100)
ax.annotate(text='COVID Pandemic', 
    xy=(2020, 2700), xytext=(2021, 3000), 
    arrowprops=dict(facecolor='red', width=3, headwidth=12, headlength=6))

In [None]:
colors = ['black','red','orange','blue', 'green','purple','pink','brown']
sns.set_palette(sns.color_palette(colors))
sns.relplot(data=awards_summary_df, kind='line',
            x='awd_year', y='awd_count', hue='dir_abbr')

In [None]:
sns.set_palette(sns.color_palette('tab10'))

In [None]:
g = sns.relplot(
    data=awards_summary_df.query('awd_year >= 2014 and awd_year <= 2024'),
    kind='line', x='awd_year', y='awd_count', hue='dir_abbr', legend=False,
    col='dir_abbr', col_wrap=4, height=3, aspect=1.2)
g.fig.suptitle('Awards by Directorate (2014-2024)', y=1.025)
for ax in g.axes.flat:
    ax.set_ylabel('# of Awards')
    ax.set_xticks([x for x in range(2014, 2025, 2)])
    ax.tick_params('x', labelrotation=45)   

In [None]:
g = sns.relplot(
    data=awards_summary_df.query('awd_year >= 2014 and awd_year <= 2024'), 
    kind='line', x='awd_year', y='awd_count', hue='dir_abbr', legend=False, 
    col='dir_abbr', col_wrap=4, height=3, aspect=1.2)
g.fig.suptitle('Awards by Directorate (2014-2024)', y=1.025)
dir_abbrs = awards_summary_df['dir_abbr'].drop_duplicates().tolist()
for index, ax in enumerate(g.axes.flat):
    ax.set_title(dir_abbrs[index])
    ax.set_ylabel('# of Awards')
    ax.set_xticks([x for x in range(2014, 2025, 2)])
    ax.tick_params('x', labelrotation=45)

In [None]:
# import a Matplotlib module and set the figure size
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(10,7.5)) 

# create the line plot
sns.lineplot(data=awards_summary_df, x='awd_year', y='awd_count', hue='dir_abbr')

# use the ax and fig objects to enhance and save the plot
ax.set(title='Awards by Directorate (2014-2024)', 
    ylabel='# of Awards', 
    xticks=[x for x in range(2014, 2025, 2)], 
    xlim=(2014,2024), ylim=(0,3000)),
ax.tick_params('x', labelrotation=45)

In [None]:
sns.catplot(data=awards_summary_df.query('awd_year in (2018, 2020, 2022, 2024)'), 
            kind='bar', x='dir_abbr', y='awd_count', aspect=1.8)

In [None]:
sns.catplot(data=awards_summary_df.query('awd_year in (2018,2020,2022,2024)'),
            kind='bar', x='dir_abbr', y='awd_count', height=4,
            col='awd_year', col_wrap=3)

In [None]:
ax = sns.lineplot(data=awards_summary_df.query('dir_abbr == "MPS"'), x='awd_year', y='awd_count')
ax.set(title = 'MPS', ylabel='# of Awards')

In [None]:
sns.relplot(data=awards_summary_df.query('awd_year >= 2014 and awd_year <= 2024'), kind='line',
            x='awd_year', y='awd_count', hue='dir_abbr')

In [None]:
sns.relplot(data=awards_summary_df.query('awd_year >= 2014 and awd_year <= 2024'), kind='scatter',
            x='awd_year', y='awd_count', hue='dir_abbr', size='awd_count', sizes=(10,50))

In [None]:
g = sns.catplot(data=awards_summary_df.query('awd_year in (2020,2021,2022,2023,2024)'),
               kind='bar', x='awd_year', y='awd_count', hue='dir_abbr', legend=False,
               col='dir_abbr', col_wrap=4, height=4)
g.fig.suptitle('Awards by Directorate', y=1.025)
for ax in g.axes.flat:
    ax.set(ylabel='# of Awards')
g.savefig('barCharts.png')

## Let's do one more quick one with award type by month

In [None]:
# Read the awards_with_type_df DataFrame back from the pickle file
awards_with_type_df = pd.read_pickle(f'{data_dir}/pkl_files/awards_with_type_df.pkl')

In [None]:
# Generate an 'awd_month_and_year' column
awards_with_type_df['awd_month_and_year'] = awards_with_type_df['awd_eff_date'].dt.to_period('M').astype(str)

In [None]:
sns.relplot(data=awards_with_type_df, kind='line',
            x='awd_month_and_year', y='awd_amount', hue='awd_type')