In [200]:
import pandas as pd

In [244]:
df_subsetted = pd.read_csv('../data/preprocessed/CA/CA_1981-2015_subsetted2.csv', parse_dates=['YEARMODA'])

In [245]:
df_subsetted.head()

Unnamed: 0.1,Unnamed: 0,STN,YEARMODA,TEMP,PRCP,GUST,WDSP,MXSPD
0,0,691414,1985-04-24,54.1,0.0,15.0,5.6,9.9
1,1,691414,1985-04-25,42.9,,25.1,12.6,19.0
2,2,691414,1985-04-26,43.0,,20.0,8.8,15.9
3,3,691414,1985-04-27,42.4,,53.0,16.3,29.9
4,4,691414,1985-04-28,40.5,,40.0,14.0,27.0


In [246]:
df_subsetted.count()

Unnamed: 0    1370304
STN           1370304
YEARMODA      1370304
TEMP          1370304
PRCP          1341677
GUST           386697
WDSP          1342008
MXSPD         1338075
dtype: int64

# LA AIRPORT STATION

In [247]:
la = df_subsetted[df_subsetted['STN'] == 722950]

Only less than 20% of data from la airport station has GUST value. Let's not use GUST

In [248]:
la.count()

Unnamed: 0    12426
STN           12426
YEARMODA      12426
TEMP          12426
PRCP          12419
GUST           2453
WDSP          12426
MXSPD         12424
dtype: int64

### Precipitation Comparison between el nino year vs. normal year

The most significant indicator of El nino in CA is precipitation.

<img src="http://www.trbimg.com/img-55d635df/turbine/la-me-g-0820-el-nino-rainfall-comparison-20150820/750/750x422" />

Source: http://www.latimes.com/local/lanow/la-me-ln-el-nino-temperatures-new-record-20151117-story.html

In [249]:
from plotly import plotly as ply
from plotly import graph_objs as go

In [250]:
tt = la.set_index('YEARMODA')[['PRCP']]['1997-7-1':'1998-6-30']
tt2 = la.set_index('YEARMODA')[['PRCP']]['2014-7-1':'2015-6-30']
rr = tt.resample('M') 
rr2 = tt2.resample('M')

In [251]:
traces = []
traces.append(go.Scatter(
    x=tt.index.map(lambda x:x.strftime('%b %d')),
    y=tt['PRCP'],
    name='1997-1998'))
traces.append(go.Scatter(
    x=tt2.index.map(lambda x:x.strftime('%b %d')),
    y=tt2['PRCP'],
    name='2014-2015'))
traces.append(go.Scatter(
    x=rr.index.map(lambda x:x.strftime('%b %d')),
    y=rr['PRCP'],
    name='1997-1998 monthly'))
traces.append(go.Scatter(
    x=rr2.index.map(lambda x:x.strftime('%b %d')),
    y=rr2['PRCP'],
    name='2014-2015 monthly'))
ply.iplot(traces, filename='WeatherData/test1')

There's a huge difference in precipitation between 1997-1998 and 2014-2015, but only obvious in Feb.

<img src="http://data-week.popsci.com/gifs/Precipitation-Comparison-Two-ElNino-Years.gif" />

## Temperature comparison

In [252]:
tt = la.set_index('YEARMODA')['1997-7-1':'1998-6-30']
tt2 = la.set_index('YEARMODA')['2014-7-1':'2015-6-30']
rr = tt.resample('M') 
rr2 = tt2.resample('M')
traces = []
traces.append(go.Scatter(
    x=tt.index.map(lambda x:x.strftime('%b %d')),
    y=tt['TEMP'],
    name='1997-1998'))
traces.append(go.Scatter(
    x=tt2.index.map(lambda x:x.strftime('%b %d')),
    y=tt2['TEMP'],
    name='2014-2015'))
traces.append(go.Scatter(
    x=rr.index.map(lambda x:x.strftime('%b %d')),
    y=rr['TEMP'],
    name='1997-1998 monthly'))
traces.append(go.Scatter(
    x=rr2.index.map(lambda x:x.strftime('%b %d')),
    y=rr2['TEMP'],
    name='2014-2015 monthly'))

ply.iplot(traces, filename='WeatherData/test2')

Not very significant.

In [183]:
tt = la.set_index('YEARMODA')['1997-7-1':'1998-6-30']
tt2 = la.set_index('YEARMODA')['1996-7-1':'1997-6-30']
rr = tt.resample('M') 
rr2 = tt2.resample('M')
traces = []
traces.append(go.Scatter(
    x=tt.index.map(lambda x:x.strftime('%b %d')),
    y=tt['TEMP'],
    name='1997-1998'))
traces.append(go.Scatter(
    x=tt2.index.map(lambda x:x.strftime('%b %d')),
    y=tt2['TEMP'],
    name='1996-1997'))
traces.append(go.Scatter(
    x=rr.index.map(lambda x:x.strftime('%b %d')),
    y=rr['TEMP'],
    name='1997-1998 monthly'))
traces.append(go.Scatter(
    x=rr2.index.map(lambda x:x.strftime('%b %d')),
    y=rr2['TEMP'],
    name='1996-1997 monthly'))

ply.iplot(traces, filename='WeatherData/test3')

But there's some difference between 1997-1998 and 1996-1997. Both 2014-2015 and 1996-1997 are normal years.

<img src="http://data-week.popsci.com/gifs/Temperature-Comparison-Two-ElNino-Years.gif" />

Even in 1997-1998, temperature in CA was only typical. CA is predicted to be warmer in this winter but not very high probability.

## Does this also hold for 1982-1983?

In [253]:
tt = la.set_index('YEARMODA')['1982-7-1':'1983-6-30']
tt2 = la.set_index('YEARMODA')['2014-7-1':'2015-6-30']
rr = tt.resample('M') 
rr2 = tt2.resample('M')
traces = []
traces.append(go.Scatter(
    x=tt.index.map(lambda x:x.strftime('%b %d')),
    y=tt['PRCP'],
    name='1982-1983'))
traces.append(go.Scatter(
    x=tt2.index.map(lambda x:x.strftime('%b %d')),
    y=tt2['PRCP'],
    name='2014-2015'))
traces.append(go.Scatter(
    x=rr.index.map(lambda x:x.strftime('%b %d')),
    y=rr['PRCP'],
    name='1982-1983 monthly'))
traces.append(go.Scatter(
    x=rr2.index.map(lambda x:x.strftime('%b %d')),
    y=rr2['PRCP'],
    name='2014-2015 monthly'))

ply.iplot(traces, filename='WeatherData/test4')

Even less significant.

In [254]:
tt = la.set_index('YEARMODA')['1982-7-1':'1983-6-30']
tt2 = la.set_index('YEARMODA')['2014-7-1':'2015-6-30']
rr = tt.resample('M') 
rr2 = tt2.resample('M')
traces = []
traces.append(go.Scatter(
    x=tt.index.map(lambda x:x.strftime('%b %d')),
    y=tt['TEMP'],
    name='1982-1983'))
traces.append(go.Scatter(
    x=tt2.index.map(lambda x:x.strftime('%b %d')),
    y=tt2['TEMP'],
    name='2014-2015'))
traces.append(go.Scatter(
    x=rr.index.map(lambda x:x.strftime('%b %d')),
    y=rr['TEMP'],
    name='1982-1983 monthly'))
traces.append(go.Scatter(
    x=rr2.index.map(lambda x:x.strftime('%b %d')),
    y=rr2['TEMP'],
    name='2014-2015 monthly'))

ply.iplot(traces, filename='WeatherData/test5')

In [296]:
tt = la.set_index('YEARMODA')['1982-7-1':'1983-6-30']
tt2 = la.set_index('YEARMODA')['2014-7-1':'2015-6-30']
rr = tt.resample('M') 
rr2 = tt2.resample('M')
traces = []
traces.append(go.Scatter(
    x=tt.index.map(lambda x:x.strftime('%b %d')),
    y=tt['WDSP'],
    name='1982-1983'))
traces.append(go.Scatter(
    x=tt2.index.map(lambda x:x.strftime('%b %d')),
    y=tt2['WDSP'],
    name='2014-2015'))
traces.append(go.Scatter(
    x=rr.index.map(lambda x:x.strftime('%b %d')),
    y=rr['WDSP'],
    name='1982-1983 monthly'))
traces.append(go.Scatter(
    x=rr2.index.map(lambda x:x.strftime('%b %d')),
    y=rr2['WDSP'],
    name='2014-2015 monthly'))

ply.iplot(traces, filename='WeatherData/test_wdsp')

# Correlations

In [301]:
indexed = la.set_index('YEARMODA')[['TEMP', 'PRCP', 'WDSP', 'MXSPD']][:'2014']

In [280]:
shifted = indexed[indexed.index.map(lambda x:True if x.month in [11,12,1,2] else False)].resample('M').dropna().shift(-6, 'M')
years = shifted.groupby(shifted.index.year).apply(lambda x:x.mean())

In [299]:
trace0 = go.Scatter(x=years.index,
                    y=years['PRCP'])
ply.iplot([trace0], filename='WeatherData/testaa')

## El nino index

In [212]:
oci = pd.read_csv('../data/raw/ONI.txt', sep=r'\t')





In [213]:
oci['index'] = oci['Type'].map({'VSL':-2, 'SL':-1.5, 'ML':-1, 'WL':-0.5, 'N':0, 'WE':0.5, 'ME':1, 'SE':1.5, 'VSE':2})

In [281]:
haha = pd.concat([years, oci.set_index('From')[['index']]], join='inner', axis=1)

In [266]:
haha

Unnamed: 0,TEMP,PRCP,WDSP,MXSPD,index
1981,57.935198,0.149135,6.214785,12.867443,0.0
1982,57.95672,0.154857,6.829877,13.63096,2.0
1983,58.422312,0.047739,6.306589,13.51767,-0.5
1984,56.161617,0.083768,6.227346,12.37453,-0.5
1985,58.826843,0.111417,6.303456,12.749453,0.0
1986,58.653961,0.03207,6.102775,12.064301,1.0
1987,57.569042,0.06365,6.540107,12.700383,1.0
1988,55.961694,0.063833,6.251375,12.123783,-1.5
1989,58.379608,0.039611,5.921745,11.921313,0.0
1990,58.715123,0.031383,5.190607,10.285958,0.0


## Precipitation and El nino (Higher index means stronger el nino)

In [273]:
traces = []
traces.append(go.Scatter(x=haha['index'], y=haha['PRCP'], text=haha.index, mode='markers', 
                         marker = dict(color=haha['index'], colorscale='Viridis')))
ply.iplot(traces, filename='WeatherData/precipitation')

Positive Correlation between precipitation and el nino. (at least in LA)

## Temperature and El nino

In [276]:
a = la.set_index('YEARMODA')[['TEMP']][:'2014']
b = a[a.index.map(lambda x:True if x.month in [11,12,1,2] else False)].resample('M').dropna().shift(-6, 'M')
c = b.groupby(b.index.year).apply(lambda x:x.mean())
d = pd.concat([c, oci.set_index('From')[['index']]], join='inner', axis=1)
traces = []
traces.append(go.Scatter(x=d['index'], y=d['TEMP'], text=d.index, mode='markers'))
ply.iplot(traces, filename='WeatherData/test6')

Less significant correlation

In [289]:
traces = []
traces.append(go.Scatter(x=haha['PRCP'], y=haha['TEMP'], text=haha.index, mode='markers', 
                         marker = dict(color=haha['index'])))
ply.iplot(traces, filename='WeatherData/test7')

TEMP is not correlated with el nino

In [290]:
traces = []
traces.append(go.Scatter(x=haha['PRCP'], y=haha['WDSP'], text=haha.index, mode='markers', 
                         marker = dict(color=haha['index'])))
ply.iplot(traces, filename='WeatherData/test8')

In [291]:
traces = []
traces.append(go.Scatter(x=haha['PRCP'], y=haha['MXSPD'], text=haha.index, mode='markers', 
                         marker = dict(color=haha['index'])))
ply.iplot(traces, filename='WeatherData/test8')

In [292]:
traces = []
traces.append(go.Scatter(x=haha['WDSP'], y=haha['MXSPD'], text=haha.index, mode='markers', 
                         marker = dict(color=haha['index'])))
ply.iplot(traces, filename='WeatherData/test8')

#Select STATION

In [350]:
useful_variables = ['PRCP', 'WDSP', 'MXSPD', 'TEMP']

In [351]:
all_indexed = df_subsetted.set_index('YEARMODA')[['STN'] + useful_variables][:'2014']
all_months = all_indexed[all_indexed.index.map(lambda x:True if x.month in [11,12,1,2] else False)]

In [352]:
all_shifted = all_months.shift(-6, 'M')

In [353]:
mean1997 = all_shifted['1997'].groupby('STN').mean()

In [354]:
mean2014 = all_shifted['2014'].groupby('STN').mean()

In [355]:
diff = mean1997 - mean2014
diff = diff.dropna()

In [356]:
diff.sum()

PRCP       0.250216
WDSP      97.030370
MXSPD     85.716564
TEMP    -223.637501
dtype: float64

In [None]:
722950

In [344]:
traces = []
traces.append(go.Scatter(y=diff['PRCP'], text=diff.index, mode='markers', ))
#                          marker = dict(color=haha['index'])))
ply.iplot(traces, filename='WeatherData/diff1')

In [348]:
traces = []
traces.append(go.Scatter(y=diff['WDSP'], text=diff.index, mode='markers', ))
#                          marker = dict(color=haha['index'])))
ply.iplot(traces, filename='WeatherData/diff2')

In [349]:
traces = []
traces.append(go.Scatter(y=diff['MXSPD'], text=diff.index, mode='markers', ))
#                          marker = dict(color=haha['index'])))
ply.iplot(traces, filename='WeatherData/diff3')

In [357]:
traces = []
traces.append(go.Scatter(y=diff['TEMP'], text=diff.index, mode='markers', ))
#                          marker = dict(color=haha['index'])))
ply.iplot(traces, filename='WeatherData/diff5')

In [307]:
all_years = all_shifted.groupby(all_shifted.index.year).apply(lambda x:x.mean()) # average over stations
hehe = pd.concat([all_years, oci.set_index('From')[['index']]], join='inner', axis=1)
traces = []
traces.append(go.Scatter(x=hehe['PRCP'], y=hehe['MXSPD'], text=hehe.index, mode='markers', 
                         marker = dict(color=hehe['index'])))
ply.iplot(traces, filename='WeatherData/hehe')