In [None]:
# libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline

** Lyme disease** <br> 
[Lyme](https://www.cdc.gov/lyme/index.html) is the most common vector-borne disease in the United States. Lyme disease is caused by the bacterium _Borrelia burgdorferi_ and rarely, _Borrelia mayonii_. The bacteria is transmitted to humans through the bite of infected blacklegged ticks. 

With changing climate, the incidence rate and the range of Lyme disease is increasing. Hence [Lyme disease is a good Climate Change Indicator](https://www.cdc.gov/lyme/stats/tables.html)

In [None]:
# import a data file
df1 = pd.read_csv('./data/lyme.csv')
df1.head()

In [None]:
df1.tail()

In [None]:
df1.dtypes

In [None]:
df1["Year"]

In [None]:
# simple statistics on a column
df1['Incidence Rate'].mean()

`loc` and `iloc` for slicing

`iloc` serches for the index number : integer positioning<br>
`loc` searches for index label : label positioning

In [None]:
# slicing by index numbers:
df1.iloc[0]

In [None]:
# instead of head()
df1.iloc[:5]

In [None]:
# every second row, until 10th
df1.iloc[:10:2]

In [None]:
# columns
df1.iloc[:5,1]

In [None]:
test_df = pd.DataFrame({'A': [1,2,3], 'B': [10,11,12]}, index=['a', 'b', 'c'])
test_df

In [None]:
test_df.iloc[1]

In [None]:
test_df.loc['b']

In [None]:
test_df.index.get_loc('b')

In [None]:
# slicing by Boolean
# criterion
test_df['B'] == 11

In [None]:
test_df.loc[test_df['B'] == 11]

In [None]:
# slicing by Boolean:
df1.loc[df1['Year'] <= 2000]

In [None]:
# using boolean slicing to slice relevant data
df1.loc[df1['Year'] > 2000]

In [None]:
print('Average incidence rate after 2000: ')
print(df1.loc[df1['Year'] > 2000]['Incidence Rate'].mean())

print('Average incidence rate before 2000: ')
print(df1.loc[df1['Year'] <= 2000]['Incidence Rate'].mean())

In [None]:
print('Average incidence rate after 2000: ')
print(round(df1.loc[df1['Year'] > 2000]['Incidence Rate'].mean(), 2))

print('Average incidence rate before 2000: ')
print(round(df1.loc[df1['Year'] <= 2000]['Incidence Rate'].mean(), 2))

In [None]:
# descriptive statistics
round(df1['Incidence Rate'].describe())

** A simple plot ** <br>


In [None]:
plt.plot(df1['Year'], df1['Incidence Rate']);

In [None]:
plt.plot(df1['Year'], df1['Incidence Rate'])
plt.legend()
plt.xlabel('Year')
plt.ylabel('Cases per 100,000 people')
plt.title('Reported Cases of Lyme Disease in the United States, \n (1991–2014)')
plt.show();

** Line attributes **

In [None]:
# sample data:
x = np.linspace(0, 10, 1000)
y = np.sin(x)

# line colors
plt.plot(x, np.sin(x - 0), color='blue')        # specify color by name
plt.plot(x, np.sin(x - 1), color='g')           # short color code (rgbcmyk)
plt.plot(x, np.sin(x - 2), color='0.75')        # Grayscale between 0 and 1
plt.plot(x, np.sin(x - 3), color='#FFDD44')     # Hex code (RRGGBB from 00 to FF)
plt.plot(x, np.sin(x - 4), color=(1.0,0.2,0.3)) # RGB tuple, values 0 to 1
plt.plot(x, np.sin(x - 5), color='chartreuse'); # all HTML color names supported

In [None]:
# line types

plt.plot(x, x + 0, linestyle='solid')
plt.plot(x, x + 1, linestyle='dashed')
plt.plot(x, x + 2, linestyle='dashdot')
plt.plot(x, x + 3, linestyle='dotted');

# For short, you can use the following codes:
plt.plot(x, x + 4, linestyle='-')  # solid
plt.plot(x, x + 5, linestyle='--') # dashed
plt.plot(x, x + 6, linestyle='-.') # dashdot
plt.plot(x, x + 7, linestyle=':');  # dotted

In [None]:
plt.plot(x, x + 0, '-g')  # solid green
plt.plot(x, x + 1, '--c') # dashed cyan
plt.plot(x, x + 2, '-.k') # dashdot black
plt.plot(x, x + 3, ':r');  # dotted red

In [None]:
plt.plot(df1['Year'], df1['Incidence Rate'])
plt.legend()
plt.xlabel('Year')
plt.ylabel('Cases per 100,000 people')
plt.title('Reported Cases of Lyme Disease in the United States, \n (1991–2014)')
plt.xlim(1991,2010)
plt.xticks([1995, 2000, 2005, 2010])
# plt.ylim(-1.5, 1.5)

# optional
plt.text(2000, 5.5, r'$\mu=100,\ \sigma=15$') # Latex
plt.annotate('New Century', xy=(2000, 6.2), xytext=(1995, 7.5),
         arrowprops=dict(facecolor='black', shrink=0.05))

plt.show();

** West Nile Virus disease** <br> 
[West Nile Virus](https://www.cdc.gov/westnile/index.html) (WNV) is the leading cause of mosquito-borne disease in the continental United States. Cases of WNV occur during mosquito season, which starts in the summer and continues through fall.

With increasingly warmer and longer summers, the incidence rate and the range of WNV disease is also increasing. Hence [WNV is another Climate Change Indicator](https://www.cdc.gov/westnile/statsmaps/cumMapsData.html)

In [None]:
# another data file
# df2 = pd.read_csv('./data/west-nile.txt', skiprows=6)
df2 = pd.read_csv('./data/west-nile.txt', skiprows=7, names=['Year', 'WNV_incidence'])
df2.head()

In [None]:
plt.plot(df1['Year'], df1['Incidence Rate'])
plt.plot(df2['Year'], df2['WNV_incidence'])
plt.legend()
plt.xlabel('Year')
plt.ylabel('Cases per 100,000 people')
plt.title('Reported Cases of Lyme & WNV Diseases in the United States')
plt.show();

** Alternate way to plot **

In [None]:
plt.figure()
plt.plot([1, 2, 3, 4]);

In [None]:
# overlaid plots
plt.figure()
plt.plot([2, 3, 4, 5], 'g')
plt.plot([1, 2, 3, 4], 'r');

In [None]:
plt.figure()
plt.subplot(1, 2, 1) # rows, columns, number
plt.plot([1, 2, 3, 4], 'r')

plt.subplot(1, 2, 2) # rows, columns, number
plt.plot([1, 2, 3, 4], 'g');

In [None]:
fig, axs = plt.subplots(2, 1)
axs[0].plot()
axs[1].plot()
plt.show();

In [None]:
# two y axes
fig, ax1 = plt.subplots()

color = 'tab:blue'

# plot 1
ax1.set_xlabel("Year")
ax1.set_ylabel("Lyme cases per 100,000 people", color=color)
# ax1.set_ylim(min,max)
ax1.plot(df1['Year'], df1['Incidence Rate'], color=color)
ax1.tick_params(axis='y', labelcolor=color)

ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis

color = 'tab:red'

# plot 2
ax2.set_ylabel("Zika cases per 100,000 people", color=color)
ax2.plot(df2['Year'], df2['WNV_incidence'], color=color)
ax2.tick_params(axis='y', labelcolor=color)
# ax2.set_ylim(min, max)

fig.tight_layout()  # otherwise the right y-label is slightly clipped

plt.title('Reported Cases of Lyme & WNV Disease in the United States')
# plt.xlim(min, max)

plt.show();

In [None]:
# saving figures
# plt.savefig('./data/fig.png')
# plt.savefig('fig.pdf')