### Import Dependencies

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime

### Read in NYC CSV

In [2]:
nyc_df = pd.read_csv("Resources/NewYork_months.csv")
nyc_df.head()

Unnamed: 0,Year,Month,AQI
0,2014,1,150.0
1,2014,2,149.0
2,2014,3,125.0
3,2014,4,116.0
4,2014,5,106.0


### Separating out the years to combine in Summary_Data_and_plotting.ipynb

* Leaving out 2014 for heatmap purpose

#### 2015

In [3]:
nyc_15 = nyc_df.loc[(nyc_df["Year"]==2015)]
nyc_15

Unnamed: 0,Year,Month,AQI
7,2015,1,147.0
8,2015,2,149.0
9,2015,3,130.0
10,2015,4,101.0
11,2015,5,124.0
12,2015,6,99.0
13,2015,7,131.0


#### 2016

In [4]:
nyc_16 = nyc_df.loc[(nyc_df["Year"]==2016)]
nyc_16

Unnamed: 0,Year,Month,AQI
14,2016,1,141.0
15,2016,2,128.0
16,2016,3,112.0
17,2016,4,106.0
18,2016,5,109.0
19,2016,6,109.0
20,2016,7,111.0


### Merge 2015 and 2016

In [5]:
nyc_1516 = pd.merge(nyc_15,nyc_16,how='outer',on=["Month"],suffixes=("_15","_16"))
nyc_1516

Unnamed: 0,Year_15,Month,AQI_15,Year_16,AQI_16
0,2015,1,147.0,2016,141.0
1,2015,2,149.0,2016,128.0
2,2015,3,130.0,2016,112.0
3,2015,4,101.0,2016,106.0
4,2015,5,124.0,2016,109.0
5,2015,6,99.0,2016,109.0
6,2015,7,131.0,2016,111.0


#### 2017

In [6]:
nyc_17 = nyc_df.loc[(nyc_df["Year"]==2017)]
nyc_17

Unnamed: 0,Year,Month,AQI
21,2017,1,132.0
22,2017,2,129.0
23,2017,3,112.0
24,2017,4,95.0
25,2017,5,81.0
26,2017,6,101.0
27,2017,7,108.0


#### 2018

In [7]:
nyc_18 = nyc_df.loc[(nyc_df["Year"]==2018)]
nyc_18

Unnamed: 0,Year,Month,AQI
28,2018,1,137.0
29,2018,2,124.0
30,2018,3,93.0
31,2018,4,97.0
32,2018,5,104.0
33,2018,6,96.0
34,2018,7,105.0


### Merge 2017 and 2018

In [8]:
nyc_1718 = pd.merge(nyc_17,nyc_18,how='outer',on=["Month"],suffixes=("_17","_18"))
nyc_1718

Unnamed: 0,Year_17,Month,AQI_17,Year_18,AQI_18
0,2017,1,132.0,2018,137.0
1,2017,2,129.0,2018,124.0
2,2017,3,112.0,2018,93.0
3,2017,4,95.0,2018,97.0
4,2017,5,81.0,2018,104.0
5,2017,6,101.0,2018,96.0
6,2017,7,108.0,2018,105.0


### Merge 2015/16 and 2017/18

In [9]:
nyc_1518 = pd.merge(nyc_1516,nyc_1718,how='outer',on=["Month"])
nyc_1518

Unnamed: 0,Year_15,Month,AQI_15,Year_16,AQI_16,Year_17,AQI_17,Year_18,AQI_18
0,2015,1,147.0,2016,141.0,2017,132.0,2018,137.0
1,2015,2,149.0,2016,128.0,2017,129.0,2018,124.0
2,2015,3,130.0,2016,112.0,2017,112.0,2018,93.0
3,2015,4,101.0,2016,106.0,2017,95.0,2018,97.0
4,2015,5,124.0,2016,109.0,2017,81.0,2018,104.0
5,2015,6,99.0,2016,109.0,2017,101.0,2018,96.0
6,2015,7,131.0,2016,111.0,2017,108.0,2018,105.0


#### 2019

In [10]:
nyc_19 = nyc_df.loc[(nyc_df["Year"]==2019)]
nyc_19

Unnamed: 0,Year,Month,AQI
35,2019,1,128.0
36,2019,2,112.0
37,2019,3,102.0
38,2019,4,84.0
39,2019,5,78.0
40,2019,6,94.0
41,2019,7,122.0


#### 2020

In [11]:
nyc_20 = nyc_df.loc[(nyc_df["Year"]==2020)]
nyc_20

Unnamed: 0,Year,Month,AQI
42,2020,1,126.0
43,2020,2,116.0
44,2020,3,86.0
45,2020,4,74.0
46,2020,5,68.0
47,2020,6,89.0
48,2020,7,99.0


### Merge 2019 and 2020

In [13]:
nyc_1920 = pd.merge(nyc_19,nyc_20,how='outer',on=["Month"],suffixes=("_19","_20"))
nyc_1920

Unnamed: 0,Year_19,Month,AQI_19,Year_20,AQI_20
0,2019,1,128.0,2020,126.0
1,2019,2,112.0,2020,116.0
2,2019,3,102.0,2020,86.0
3,2019,4,84.0,2020,74.0
4,2019,5,78.0,2020,68.0
5,2019,6,94.0,2020,89.0
6,2019,7,122.0,2020,99.0


### Merge 2015-18 and 2019/20

In [14]:
nyc_all = pd.merge(nyc_1518,nyc_1920,how='outer',on=["Month"])
nyc_all

Unnamed: 0,Year_15,Month,AQI_15,Year_16,AQI_16,Year_17,AQI_17,Year_18,AQI_18,Year_19,AQI_19,Year_20,AQI_20
0,2015,1,147.0,2016,141.0,2017,132.0,2018,137.0,2019,128.0,2020,126.0
1,2015,2,149.0,2016,128.0,2017,129.0,2018,124.0,2019,112.0,2020,116.0
2,2015,3,130.0,2016,112.0,2017,112.0,2018,93.0,2019,102.0,2020,86.0
3,2015,4,101.0,2016,106.0,2017,95.0,2018,97.0,2019,84.0,2020,74.0
4,2015,5,124.0,2016,109.0,2017,81.0,2018,104.0,2019,78.0,2020,68.0
5,2015,6,99.0,2016,109.0,2017,101.0,2018,96.0,2019,94.0,2020,89.0
6,2015,7,131.0,2016,111.0,2017,108.0,2018,105.0,2019,122.0,2020,99.0


### Dropping the year columns no longer needed

In [15]:
nyc_all2 = nyc_all.drop(["Year_15","Year_16","Year_17","Year_18","Year_19","Year_20"],axis=1)
nyc_all2

Unnamed: 0,Month,AQI_15,AQI_16,AQI_17,AQI_18,AQI_19,AQI_20
0,1,147.0,141.0,132.0,137.0,128.0,126.0
1,2,149.0,128.0,129.0,124.0,112.0,116.0
2,3,130.0,112.0,112.0,93.0,102.0,86.0
3,4,101.0,106.0,95.0,97.0,84.0,74.0
4,5,124.0,109.0,81.0,104.0,78.0,68.0
5,6,99.0,109.0,101.0,96.0,94.0,89.0
6,7,131.0,111.0,108.0,105.0,122.0,99.0


In [16]:
nyc = nyc_all2.rename(columns={
    
                                    "AQI_15":"Avg AQI_15",
                                    "AQI_16":"Avg AQI_16",
                                    "AQI_17":"Avg AQI_17",
                                    "AQI_18":"Avg AQI_18",
                                    "AQI_19":"Avg AQI_19",
                                    "AQI_20":"Avg AQI_20"
        
                                    })
nyc

Unnamed: 0,Month,Avg AQI_15,Avg AQI_16,Avg AQI_17,Avg AQI_18,Avg AQI_19,Avg AQI_20
0,1,147.0,141.0,132.0,137.0,128.0,126.0
1,2,149.0,128.0,129.0,124.0,112.0,116.0
2,3,130.0,112.0,112.0,93.0,102.0,86.0
3,4,101.0,106.0,95.0,97.0,84.0,74.0
4,5,124.0,109.0,81.0,104.0,78.0,68.0
5,6,99.0,109.0,101.0,96.0,94.0,89.0
6,7,131.0,111.0,108.0,105.0,122.0,99.0


In [17]:
nyc.to_csv("output_data/nyc.csv",index=False)