In [128]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [248]:
df = pd.read_csv('data.csv')
df.head()

Unnamed: 0,ID,Date,Element,Data_Value
0,USW00094889,2014-11-12,TMAX,22
1,USC00208972,2009-04-29,TMIN,56
2,USC00200032,2008-05-26,TMAX,278
3,USC00205563,2005-11-11,TMAX,139
4,USC00200230,2014-02-27,TMAX,-106


In [249]:
df['Data_Value'] = df['Data_Value']/10 # because temperature is given in tenths of values

## 1. get day and month combined to be used as index later for the purpose of plotting

In [250]:
df['Date'] = pd.to_datetime(df['Date'])

In [251]:
df.dtypes

ID                    object
Date          datetime64[ns]
Element               object
Data_Value           float64
dtype: object

In [252]:
def get_day(x):
    return x.day

def get_month(x):
    return x.month

In [253]:
df['Day'] = df['Date'].apply(get_day)
df['Month'] = df['Date'].apply(get_month)

In [254]:
# df['Day'] = df['Day'].astype(str)
# df['Month'] = df['Month'].astype(str)
# df['monthday'] = df['Month'].astype(str) + '-' + df['Day'].astype(str) # we do not create this here because the columns don't
# remain in grouped data

In [255]:
df.head()

Unnamed: 0,ID,Date,Element,Data_Value,Day,Month
0,USW00094889,2014-11-12,TMAX,2.2,12,11
1,USC00208972,2009-04-29,TMIN,5.6,29,4
2,USC00200032,2008-05-26,TMAX,27.8,26,5
3,USC00205563,2005-11-11,TMAX,13.9,11,11
4,USC00200230,2014-02-27,TMAX,-10.6,27,2


In [256]:
df.dtypes

ID                    object
Date          datetime64[ns]
Element               object
Data_Value           float64
Day                    int64
Month                  int64
dtype: object

## 2. drop leap dates

In [257]:
df.shape

(165085, 6)

In [258]:
def get_leapdate(x):
    if not(x.month == 2 and x.day == 29):
        return True
    else:
        return False

In [259]:
df = df[df['Date'].apply(get_leapdate)]

In [260]:
df.shape

(165002, 6)

## 3. Split by year. 2004-2014 and 2015

In [261]:
#df1: 2004-2014 and df2: 2015
df1 = df[df['Date'] < '2015-01-01 00:00:00']
df2 = df[df['Date'] > '2014-12-31 00:00:00']

## 4. Split data into tmax and tmin

In [262]:
# 2004 to 2014
tmax1 = df1[df1['Element'] == 'TMAX']
tmin1 = df1[df1['Element'] == 'TMIN']

# 2015
tmax2 = df2[df2['Element'] == 'TMAX']
tmin2 = df2[df2['Element'] == 'TMIN']

## 5. Group data by date to be plotted

In [263]:
tmax1_group = pd.DataFrame(tmax1.groupby(['Month','Day'])['Data_Value'].max())
tmin1_group = pd.DataFrame(tmin1.groupby(['Month','Day'])['Data_Value'].min())

In [264]:
tmin1_group.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Data_Value
Month,Day,Unnamed: 2_level_1
1,1,-16.0
1,2,-26.7
1,3,-26.7
1,4,-26.1
1,5,-15.0


In [265]:
tmax1_group = tmax1_group.reset_index()

In [266]:
tmax1_group.head()

Unnamed: 0,Month,Day,Data_Value
0,1,1,15.6
1,1,2,13.9
2,1,3,13.3
3,1,4,10.6
4,1,5,12.8


In [267]:
tmax1_group['cDay'] = tmax1_group['Month'].astype(str) + '-' + tmax1_group['Day'].astype(str)
tmax1_group.set_index('cDay', inplace=True)

In [268]:
tmin1_group = tmin1_group.reset_index()
tmin1_group['cDay'] = tmin1_group['Month'].astype(str) + '-' + tmin1_group['Day'].astype(str)
tmin1_group.set_index('cDay', inplace=True)

In [269]:
tmax1_group.head()

Unnamed: 0_level_0,Month,Day,Data_Value
cDay,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1-1,1,1,15.6
1-2,1,2,13.9
1-3,1,3,13.3
1-4,1,4,10.6
1-5,1,5,12.8


In [270]:
tmin1_group.head()

Unnamed: 0_level_0,Month,Day,Data_Value
cDay,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1-1,1,1,-16.0
1-2,1,2,-26.7
1-3,1,3,-26.7
1-4,1,4,-26.1
1-5,1,5,-15.0


## 6. Plot Data

In [271]:
int(len(tmax1_group.index.values)/12)

30

In [272]:
%matplotlib notebook

In [274]:
plt.figure()
plt.plot(tmax1_group['Data_Value'])
plt.plot(tmin1_group['Data_Value'])


<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x6400f45470>]

In [275]:
months = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
interval = int(len(tmax1_group.index.values)/12)
pos = (np.array(range(1,13))*(interval))-15 #-15 to shift the interval 15 points back so the month name is located in the mid loc
plt.xticks(pos, months);