# Exploring the combined data for O'hare for the year 2010 - 2019.

### CHICAGO OHARE INTERNATIONAL AIRPORT, IL US

ID	GHCND:USW00094846  
Lat/Lon:	41.995, -87.9336

### Data source

https://www.ncdc.noaa.gov/cdo-web/search

### Data

WT03 - Thunder  
WT04 - Ice pellets, sleet, snow pellets, or small hail"  
**PRCP - Precipitation**  
WT05 - Hail (may include small hail)  
WT06 - Glaze or rime  
WT07 - Dust, volcanic ash, blowing dust, blowing sand, or blowing obstruction  
WT08 - Smoke or haze  
**SNWD - Snow depth**  
WT09 - Blowing or drifting snow  
WDF2 - Direction of fastest 2-minute wind  
WDF5 - Direction of fastest 5-second wind  
PGTM - Peak gust time  
WT11 - High or damaging winds  
**TMAX - Maximum temperature**  
WT13 - Mist  
WSF2 - Fastest 2-minute wind speed  
FMTM - Time of fastest mile or fastest 1-minute wind  
WSF5 - Fastest 5-second wind speed  
**SNOW - Snowfall**  
WT14 - Drizzle  
WT15 - Freezing drizzle  
WT16 - Rain (may include freezing rain, drizzle, and freezing drizzle)"  
WT17 - Freezing rain  
WT18 - Snow, snow pellets, snow grains, or ice crystals  
WT19 - Unknown source of precipitation  
**AWND - Average wind speed**  
WT21 - Ground fog  
WT22 - Ice fog or freezing fog  
WT01 - Fog, ice fog, or freezing fog (may include heavy fog)  
WESD - Water equivalent of snow on the ground  
WT02 - Heavy fog or heaving freezing fog (not always distinguished from fog)  
**TAVG - Average Temperature**  
**TMIN - Minimum temperature**  

In [19]:
import pandas as pd
import time

In [20]:
data = pd.read_csv('../data/ohare/all_ohare_data_2010_2019.csv')
print(data.shape)
data.head()

(3377, 36)


Unnamed: 0,STATION,NAME,DATE,AWND,FMTM,PGTM,PRCP,SNOW,SNWD,TAVG,...,WT11,WT13,WT14,WT15,WT16,WT17,WT18,WT19,WT21,WT22
0,USW00094846,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2010-01-01,10.29,2359.0,2351.0,0.0,0.0,2.0,,...,,,,,,,1.0,,,
1,USW00094846,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2010-01-02,11.86,928.0,2206.0,0.0,0.0,2.0,,...,,,,,,,,,,
2,USW00094846,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2010-01-03,10.29,2114.0,1917.0,0.0,0.0,2.0,,...,,,,,1.0,,1.0,,,
3,USW00094846,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2010-01-04,11.41,1644.0,1300.0,0.0,0.0,2.0,,...,,,,,1.0,,1.0,,,
4,USW00094846,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2010-01-05,8.5,1518.0,1500.0,0.0,0.0,2.0,,...,,,,,1.0,,1.0,,,


In [21]:
data.dtypes

STATION     object
NAME        object
DATE        object
AWND       float64
FMTM       float64
PGTM       float64
PRCP       float64
SNOW       float64
SNWD       float64
TAVG       float64
TMAX         int64
TMIN         int64
WDF2         int64
WDF5       float64
WESD       float64
WSF2       float64
WSF5       float64
WT01       float64
WT02       float64
WT03       float64
WT04       float64
WT05       float64
WT06       float64
WT07       float64
WT08       float64
WT09       float64
WT11       float64
WT13       float64
WT14       float64
WT15       float64
WT16       float64
WT17       float64
WT18       float64
WT19       float64
WT21       float64
WT22       float64
dtype: object

In [22]:
data.isnull().sum()

STATION       0
NAME          0
DATE          0
AWND          0
FMTM       2739
PGTM       2743
PRCP          0
SNOW          0
SNWD          0
TAVG       1186
TMAX          0
TMIN          0
WDF2          0
WDF5         13
WESD       3012
WSF2          0
WSF5         13
WT01       2178
WT02       3277
WT03       3039
WT04       3334
WT05       3268
WT06       3342
WT07       3357
WT08       2745
WT09       3337
WT11       3368
WT13       3008
WT14       3299
WT15       3373
WT16       2911
WT17       3372
WT18       3182
WT19       3376
WT21       3374
WT22       3359
dtype: int64

In [23]:
features = ['DATE', 'AWND', 'PRCP', 'SNOW', 'SNWD', 'TAVG', 'TMAX', 'TMIN', 'WDF2', 'WSF2']

In [24]:
data = data[features]
print(data.shape)
data.head()

(3377, 10)


Unnamed: 0,DATE,AWND,PRCP,SNOW,SNWD,TAVG,TMAX,TMIN,WDF2,WSF2
0,2010-01-01,10.29,0.0,0.0,2.0,,16,5,320,15.0
1,2010-01-02,11.86,0.0,0.0,2.0,,11,2,340,17.9
2,2010-01-03,10.29,0.0,0.0,2.0,,18,-1,330,16.1
3,2010-01-04,11.41,0.0,0.0,2.0,,18,7,310,17.0
4,2010-01-05,8.5,0.0,0.0,2.0,,23,13,340,16.1


In [25]:
data.SNWD.value_counts()

0.0     2951
2.0       74
1.2       57
5.1       40
1.0       35
3.0       32
3.1       28
3.9       26
5.9       21
4.0       14
9.1       13
5.0       13
7.1       10
9.8        9
6.0        7
7.9        6
11.8       5
11.0       5
17.0       4
7.0        3
18.0       3
14.2       3
10.0       2
13.0       2
8.0        2
9.0        2
15.0       2
19.0       2
16.1       1
16.9       1
21.0       1
16.0       1
18.1       1
14.0       1
Name: SNWD, dtype: int64

In [26]:
data.isnull().sum()

DATE       0
AWND       0
PRCP       0
SNOW       0
SNWD       0
TAVG    1186
TMAX       0
TMIN       0
WDF2       0
WSF2       0
dtype: int64

In [27]:
data.WDF2.max()

360

In [28]:
data.WDF2.min()

10

In [29]:
data.WDF2.value_counts()

40     159
210    154
200    151
280    140
20     140
320    133
190    133
250    127
300    125
330    123
50     121
260    120
100    120
340    116
270    104
290     99
310     98
220     97
240     94
180     93
30      92
60      82
70      79
170     76
10      68
230     63
110     60
350     60
160     49
120     48
90      47
130     46
360     45
140     42
80      40
150     33
Name: WDF2, dtype: int64

In [30]:
data.DATE

0       2010-01-01
1       2010-01-02
2       2010-01-03
3       2010-01-04
4       2010-01-05
5       2010-01-06
6       2010-01-07
7       2010-01-08
8       2010-01-09
9       2010-01-10
10      2010-01-11
11      2010-01-12
12      2010-01-13
13      2010-01-14
14      2010-01-15
15      2010-01-16
16      2010-01-17
17      2010-01-18
18      2010-01-19
19      2010-01-20
20      2010-01-21
21      2010-01-22
22      2010-01-23
23      2010-01-24
24      2010-01-25
25      2010-01-26
26      2010-01-27
27      2010-01-28
28      2010-01-29
29      2010-01-30
           ...    
3347    2019-03-02
3348    2019-03-03
3349    2019-03-04
3350    2019-03-05
3351    2019-03-06
3352    2019-03-07
3353    2019-03-08
3354    2019-03-09
3355    2019-03-10
3356    2019-03-11
3357    2019-03-12
3358    2019-03-13
3359    2019-03-14
3360    2019-03-15
3361    2019-03-16
3362    2019-03-17
3363    2019-03-18
3364    2019-03-19
3365    2019-03-20
3366    2019-03-21
3367    2019-03-22
3368    2019

### Note: there are 3377 days between 01-01-2010/03-31-2019, when you include both the first and last days in the set.

In [42]:
# there are no empty values, and every day is accounted for during this datetime interval
data[data.TAVG.notnull()]

Unnamed: 0,DATE,AWND,PRCP,SNOW,SNWD,TAVG,TMAX,TMIN,WDF2,WSF2
1186,2013-04-01,11.63,0.00,0.0,0.0,36.0,41,29,310,21.9
1187,2013-04-02,8.95,0.00,0.0,0.0,34.0,45,25,320,21.0
1188,2013-04-03,6.93,0.00,0.0,0.0,35.0,44,25,60,14.1
1189,2013-04-04,8.05,0.00,0.0,0.0,39.0,58,25,240,16.1
1190,2013-04-05,11.86,0.00,0.0,0.0,44.0,45,36,20,21.0
1191,2013-04-06,18.57,0.04,0.0,0.0,47.0,70,36,180,36.9
1192,2013-04-07,11.41,0.00,0.0,0.0,53.0,59,40,270,25.1
1193,2013-04-08,13.87,0.49,0.0,0.0,50.0,64,41,260,33.1
1194,2013-04-09,11.86,0.20,0.0,0.0,44.0,55,39,20,23.0
1195,2013-04-10,16.11,0.94,0.0,0.0,40.0,41,38,290,33.1


In [41]:
data[data.TAVG.notnull()].isnull().sum()

DATE    0
AWND    0
PRCP    0
SNOW    0
SNWD    0
TAVG    0
TMAX    0
TMIN    0
WDF2    0
WSF2    0
dtype: int64

In [38]:
data['DATE'] = pd.to_datetime(data['DATE'])
print(data.shape)
data.head()

(3377, 10)


Unnamed: 0,DATE,AWND,PRCP,SNOW,SNWD,TAVG,TMAX,TMIN,WDF2,WSF2
0,2010-01-01,10.29,0.0,0.0,2.0,,16,5,320,15.0
1,2010-01-02,11.86,0.0,0.0,2.0,,11,2,340,17.9
2,2010-01-03,10.29,0.0,0.0,2.0,,18,-1,330,16.1
3,2010-01-04,11.41,0.0,0.0,2.0,,18,7,310,17.0
4,2010-01-05,8.5,0.0,0.0,2.0,,23,13,340,16.1


In [40]:
data[data.TAVG.notnull()][(data['DATE'] >= '2015-01-01') & (data['DATE'] <= '2018-12-31')]

  """Entry point for launching an IPython kernel.


Unnamed: 0,DATE,AWND,PRCP,SNOW,SNWD,TAVG,TMAX,TMIN,WDF2,WSF2
1826,2015-01-01,14.54,0.00,0.0,0.0,20.0,32,14,240,23.9
1827,2015-01-02,4.70,0.00,0.0,0.0,26.0,34,15,150,12.1
1828,2015-01-03,5.59,0.57,0.4,0.0,31.0,34,29,140,13.0
1829,2015-01-04,15.88,0.13,2.2,1.2,29.0,33,2,320,25.9
1830,2015-01-05,10.29,0.15,1.8,2.0,2.0,6,-3,310,21.9
1831,2015-01-06,13.87,0.01,0.2,3.9,5.0,10,-5,270,23.9
1832,2015-01-07,17.45,0.00,0.0,3.1,3.0,6,-6,330,28.0
1833,2015-01-08,17.67,0.06,3.0,3.1,-2.0,18,-8,300,28.0
1834,2015-01-09,16.55,0.00,0.0,5.1,8.0,11,0,330,30.0
1835,2015-01-10,11.63,0.00,0.0,3.9,5.0,24,-2,180,21.9


## This is the data for ohare from April 1, 2013 t0 March 31, 2019.

In [43]:
data_apr012013_mar312019 = data[data.TAVG.notnull()]
print(data_apr012013_mar312019.shape)
data_apr012013_mar312019.head()

(2191, 10)


Unnamed: 0,DATE,AWND,PRCP,SNOW,SNWD,TAVG,TMAX,TMIN,WDF2,WSF2
1186,2013-04-01,11.63,0.0,0.0,0.0,36.0,41,29,310,21.9
1187,2013-04-02,8.95,0.0,0.0,0.0,34.0,45,25,320,21.0
1188,2013-04-03,6.93,0.0,0.0,0.0,35.0,44,25,60,14.1
1189,2013-04-04,8.05,0.0,0.0,0.0,39.0,58,25,240,16.1
1190,2013-04-05,11.86,0.0,0.0,0.0,44.0,45,36,20,21.0


In [44]:
data_apr012013_mar312019.dtypes

DATE    datetime64[ns]
AWND           float64
PRCP           float64
SNOW           float64
SNWD           float64
TAVG           float64
TMAX             int64
TMIN             int64
WDF2             int64
WSF2           float64
dtype: object

In [48]:
# saving the data
data_apr012013_mar312019.to_csv('../data/ohare/apr012013_mar312019.csv', index=False)

## This is the data for ohare from January 1, 2015 t0 December 31, 2018.

In [47]:
data_2015_2018 = data[data.TAVG.notnull()][(data['DATE'] >= '2015-01-01') & (data['DATE'] <= '2018-12-31')]
print(data_2015_2018.shape)
data_2015_2018.head()

(1461, 10)


  """Entry point for launching an IPython kernel.


Unnamed: 0,DATE,AWND,PRCP,SNOW,SNWD,TAVG,TMAX,TMIN,WDF2,WSF2
1826,2015-01-01,14.54,0.0,0.0,0.0,20.0,32,14,240,23.9
1827,2015-01-02,4.7,0.0,0.0,0.0,26.0,34,15,150,12.1
1828,2015-01-03,5.59,0.57,0.4,0.0,31.0,34,29,140,13.0
1829,2015-01-04,15.88,0.13,2.2,1.2,29.0,33,2,320,25.9
1830,2015-01-05,10.29,0.15,1.8,2.0,2.0,6,-3,310,21.9


In [50]:
# saving the data for Jan 01, 2015 to Dec 31, 2018
data_2015_2018.to_csv('../data/ohare/jan012015_dec312018.csv', index=False)

In [52]:
# saving the data for Apr 01, 2013 to Dec 31, 2018
data[data.TAVG.notnull()][(data['DATE'] >= '2013-04-01') & (data['DATE'] <= '2018-12-31')].to_csv('../data/ohare/apr012013_dec312018.csv', index=False)

  
