In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

df = pd.read_csv("data/infectious-diseases-by-county-year-and-sex.csv")

In [None]:
df.head()

In [None]:
diseases = df['Disease'].unique()

d_list = []
for d in diseases:
    tmp_df = df[df['Disease'] == d]
    total_tmp_df = tmp_df[tmp_df['Sex'] == 'TOTAL']
    d_list.append(total_tmp_df)

    total_sum = []
    for c in total_tmp_df['Year'].unique():
        #print(c, total_tmp_df[total_tmp_df['Year'] == c]['Cases'].sum())
        total_sum.append((c, total_tmp_df[total_tmp_df['Year'] == c]['Cases'].sum()))
    total_sum = pd.DataFrame(total_sum, columns=['Year', 'Cases'])
    
    fig, ax = plt.subplots(figsize=(25,14))
    total_sum.plot(x='Year', y='Cases', ax=ax)
    plt.title(d)
    plt.savefig('images/'+ d.replace('.', '_'))
    plt.close()

In [None]:
d_list[0]

In [None]:
county_info = pd.read_csv("data/county_info.csv", sep=';')

county_info['Per capita income'] = county_info['Per capita income'].apply(lambda x: x.replace('$', ''))
county_info['Median household income'] = county_info['Median household income'].apply(lambda x: x.replace('$', ''))
county_info['Median family income'] = county_info['Median family income'].apply(lambda x: x.replace('$', ''))

county_info['Per capita income'] = county_info['Per capita income'].astype(int)
county_info['Median household income'] = county_info['Median household income'].astype(int)
county_info['Median family income'] = county_info['Median family income'].astype(int)

county_info.head()

In [None]:
half_size = int(len(county_info)/2)
width = 0.8

fig, ax = plt.subplots(figsize=(18,8))
county_info.iloc[:half_size].plot(kind='bar', 
                 x='County', 
                 y=['Per capita income','Median household income','Median family income'],
                 ax=ax, 
                 width=width)
plt.show()

fig2, ax2 = plt.subplots(figsize=(18,8))
county_info.iloc[half_size:].plot(kind='bar', 
                 x='County', 
                 y=['Per capita income','Median household income','Median family income'],
                 ax=ax2,
                 width=width)
plt.show()


In [None]:


from statsmodels.tsa.seasonal import seasonal_decompose
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
sns.set(style="ticks")
import warnings
warnings.filterwarnings('ignore')

decompfreq = 12  # 12 months seasonality
model = 'additive'

for d in d_list:
    decomposition = seasonal_decompose(
        d.set_index("Year").value.interpolate("linear"),
        freq=decompfreq,
        model=model)
    trend = decomposition.trend
    seasonal = decomposition.seasonal 
    residual = decomposition.resid 

    fig, ax = plt.subplots(figsize=(18,6))
    d.plot(x="Year", y="Cases", ax=ax, label="observed", c='lightgrey')
    trend.plot(ax=ax, label="trend")
    plt.legend(loc='upper left')
