In [None]:
# Importing the relevant libraries

import sys
# adding to the path variables the one folder higher (locally, not changing system variables)
sys.path.append("..")
import pandas as pd
import numpy as np
import warnings

pd.set_option('display.max_columns', None)

RSEED = 42
# Modeling Libraries

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px  # pip install plotly needs to executed
import plotly.graph_objects as go
# import Pywaffle lib
from pywaffle import Waffle 

In [None]:
df = pd.read_csv('../data/Flu_Shot_Data_cleaned_2.csv')

In [None]:
df.drop('Unnamed: 0', axis=1, inplace=True)

In [None]:
df.head(2)

### Investigating on demographic data for sample description

Attributes to be potentially considered for sample description
- age
- gender
- race 
- employment status 
- income poverty 
- education
- employment status 

### Library used: PyWaffle 
- pip install pywaffle 
- documentation can be found here: https://pywaffle.readthedocs.io/en/latest/
- Icons from font awesome can be used: https://fontawesome.com/v5.15/icons

### Plotting distribution of gender

In [None]:
df_gender = df.groupby('sex').size().reset_index(name='percentage')
df_gender.percentage = (df_gender['percentage'] / df_gender['percentage'].sum()*100).round()
df_gender

In [None]:
fig = plt.figure(
    FigureClass=Waffle,
    rows=10,
    values=df_gender['percentage'],
    colors=["#781fb8", "#335780"],
    icons=['female', 'male'],
    font_size=24,
    icon_style='solid',
    icon_legend=True,
    legend={
        'labels': ['Female 59%', 'Male 41%'], 
        'loc': 'upper left', 
        'bbox_to_anchor': (1, 1)
    },
    title={'label': 'Distribution of gender','loc': 'center','fontdict': {'fontsize': 12}},

    figsize=(10, 5)
)

### Plotting distribution of age 

In [None]:
df_age = df.groupby('age_group').size().reset_index(name='percentage')
df_age.percentage = (df_age['percentage'] / df_age['percentage'].sum()*100).round()
df_age

In [None]:
fig = plt.figure(
    FigureClass=Waffle,
    rows=5,
    values=df_age['percentage'],
    colors=["#668191", "#649cbd", "#2b8b9e", "#4a89ff", "#1768ff"],
    icons=['hourglass', 'hourglass', 'hourglass', 'hourglass','hourglass'],
    font_size=22,
    icon_style='solid',
    icon_legend=True,
    rounding_rule='floor',
    legend={
        'labels': ['18 - 34 Years', '35 - 44 Years', '45 - 54 Years', '55 - 64 Years', '65+ Years'], 
        'loc': 'upper left', 
        'bbox_to_anchor': (1, 1)
    },
    title={'label': 'Distribution of Age Groups','loc': 'center','fontdict': {'fontsize': 14}},

    figsize=(10, 5)
)

### Plotting distribution of race

In [None]:
df_race = df.groupby('race').size().reset_index(name='percentage')
df_race.percentage = (df_race['percentage'] / df_race['percentage'].sum()*100).round()
df_race

In [None]:
fig = plt.figure(
    FigureClass=Waffle,
    rows=10,
    values=df_race['percentage'],
    colors=["#000000", "#ff9500", "#ca76db", "#bbbdbf"],
    icons=['user', 'user', 'user', 'user'],
    font_size=24,
    icon_style='regular',
    icon_legend=True,
    rounding_rule='floor',
    legend={
        'labels': ['Black', 'Hispanic', 'Other or Multiple', 'White'], 
        'loc': 'upper left', 
        'bbox_to_anchor': (1, 1)
    },
    title={'label': 'Distribution of Ethnicities','loc': 'center','fontdict': {'fontsize': 14}},

    figsize=(10, 5)
)

### Distribution of Poverty

In [None]:
df_pov = df.groupby('income_poverty').size().reset_index(name='percentage')
df_pov.percentage = (df_pov['percentage'] / df_pov['percentage'].sum()*100).round()
df_pov

In [None]:
fig = plt.figure(
    FigureClass=Waffle,
    rows=10,
    values=df_pov['percentage'],
    colors=["#668191", "#2b8b9e", "#1768ff"],
    icons=['dollar-sign', 'dollar-sign', 'dollar-sign'],
    font_size=24,
    icon_style='solid',
    icon_legend=True,
    rounding_rule='floor',
    legend={
        'labels': ['<= 75k, above poverty (57%)', '> 75k (31%)', 'Below poverty (12%)'], 
        'loc': 'upper left', 
        'bbox_to_anchor': (1, 1)
    },
    title={'label': 'Income Poverty','loc': 'center','fontdict': {'fontsize': 14}},

    figsize=(10, 5)
)

### Employment

In [None]:
df_work = df.groupby('employment_status').size().reset_index(name='percentage')
df_work.percentage = (df_work['percentage'] / df_work['percentage'].sum()*100).round()
df_work

In [None]:
fig = plt.figure(
    FigureClass=Waffle,
    rows=10,
    values=df_work['percentage'],
    colors=["#668191", "#2b8b9e", "#1768ff"],
    icons=['hammer', 'hammer', 'hammer'],
    font_size=24,
    icon_style='solid',
    icon_legend=True,
    legend={
        'labels': ['Employed 54%', 'Not in Labor Force 41%', 'Unemployed 6%'], 
        'loc': 'upper left', 
        'bbox_to_anchor': (1, 1)
    },
    title={'label': 'Employment Status','loc': 'center','fontdict': {'fontsize': 12}},

    figsize=(10, 5)
)

In [None]:
## the following code does not work!!

df_gender = df.groupby('gender').size().reset_index(name='counts')
n_categories = df_gender.shape[0]
colors = [plt.cm.inferno_r(i/float(n_categories)) for i in range(n_categories)]

fig = plt.figure(
    FigureClass=Waffle,
    plots={
        '111': {
            'values': df['counts'],
            'labels': ["{0} ({1})".format(n[0], n[1]) for n in df[['gender', 'counts']].itertuples()],
            'legend': {'loc': 'upper left', 'bbox_to_anchor': (1.05, 1), 'fontsize': 12},
            'title': {'label': '# Vehicles by Class', 'loc': 'center', 'fontsize':18},
            'icons': {{'female': 'female', 'male': 'male'}, loc': 'center', 'fontsize':18}
        },
    },
    rows=7,
    colors=colors,
    figsize=(16, 9)
)