In [1]:
import pandas as pd

In [3]:
df_subsetted = pd.read_csv('../data/preprocessed/CA/CA_1981-2015_subsetted.csv', parse_dates=['YEARMODA'])

In [34]:
df_subsetted.head()

Unnamed: 0,YEARMODA,STN,TEMP,DIFF,PRCP,GUST
0,1985-04-24,691414,54.1,12.6,0.0,15.0
1,1985-04-25,691414,42.9,12.6,,25.1
2,1985-04-26,691414,43.0,18.0,,20.0
3,1985-04-27,691414,42.4,12.6,,53.0
4,1985-04-28,691414,40.5,14.4,,40.0


In [136]:
df_subsetted.count()

YEARMODA    1370304
STN         1370304
TEMP        1370304
DIFF        1366469
PRCP        1341677
GUST         386697
dtype: int64

## LA AIRPORT STATION

In [7]:
la = df_subsetted[df_subsetted['STN'] == 722950]

Only less than 20% of data from la airport station has GUST value. Let's not use GUST

In [82]:
la.count()

YEARMODA    12426
STN         12426
TEMP        12426
DIFF        12426
PRCP        12419
GUST         2453
dtype: int64

### Precipitation Comparison between el nino year vs. normal year

The most significant indicator of El nino in CA is precipitation.

<img src="http://www.trbimg.com/img-55d635df/turbine/la-me-g-0820-el-nino-rainfall-comparison-20150820/750/750x422" />

Source: http://www.latimes.com/local/lanow/la-me-ln-el-nino-temperatures-new-record-20151117-story.html

In [39]:
from plotly import plotly as ply
from plotly import graph_objs as go

In [106]:
tt = la.set_index('YEARMODA')[['PRCP']]['1997-7-1':'1998-6-30']
tt2 = la.set_index('YEARMODA')[['PRCP']]['2014-7-1':'2015-6-30']
rr = tt.resample('M') 
rr2 = tt2.resample('M')

In [105]:
traces = []
traces.append(go.Scatter(
    x=tt.index.map(lambda x:x.strftime('%b %d')),
    y=tt['PRCPzz'],
    name='1997-1998'))
traces.append(go.Scatter(
    x=tt2.index.map(lambda x:x.strftime('%b %d')),
    y=tt2['PRCP'],
    name='2014-2015'))
traces.append(go.Scatter(
    x=rr.index.map(lambda x:x.strftime('%b %d')),
    y=rr['PRCP'],
    name='1997-1998 monthly'))
traces.append(go.Scatter(
    x=rr2.index.map(lambda x:x.strftime('%b %d')),
    y=rr2['PRCP'],
    name='2014-2015 monthly'))
ply.iplot(traces, filename='WeatherData/test')

There's a huge difference in precipitation between 1997-1998 and 2014-2015, but only obvious in Feb.

<img src="http://data-week.popsci.com/gifs/Precipitation-Comparison-Two-ElNino-Years.gif" />

## Temperature comparison

In [133]:
tt = la.set_index('YEARMODA')['1997-7-1':'1998-6-30']
tt2 = la.set_index('YEARMODA')['2014-7-1':'2015-6-30']
rr = tt.resample('M') 
rr2 = tt2.resample('M')
traces = []
traces.append(go.Scatter(
    x=tt.index.map(lambda x:x.strftime('%b %d')),
    y=tt['TEMP'],
    name='1997-1998'))
traces.append(go.Scatter(
    x=tt2.index.map(lambda x:x.strftime('%b %d')),
    y=tt2['TEMP'],
    name='2014-2015'))
traces.append(go.Scatter(
    x=rr.index.map(lambda x:x.strftime('%b %d')),
    y=rr['TEMP'],
    name='1997-1998 monthly'))
traces.append(go.Scatter(
    x=rr2.index.map(lambda x:x.strftime('%b %d')),
    y=rr2['TEMP'],
    name='2014-2015 monthly'))

ply.iplot(traces, filename='WeatherData/test')

Not very significant.

In [132]:
tt = la.set_index('YEARMODA')['1997-7-1':'1998-6-30']
tt2 = la.set_index('YEARMODA')['1996-7-1':'1997-6-30']
rr = tt.resample('M') 
rr2 = tt2.resample('M')
traces = []
traces.append(go.Scatter(
    x=tt.index.map(lambda x:x.strftime('%b %d')),
    y=tt['TEMP'],
    name='1997-1998'))
traces.append(go.Scatter(
    x=tt2.index.map(lambda x:x.strftime('%b %d')),
    y=tt2['TEMP'],
    name='1996-1997'))
traces.append(go.Scatter(
    x=rr.index.map(lambda x:x.strftime('%b %d')),
    y=rr['TEMP'],
    name='1997-1998 monthly'))
traces.append(go.Scatter(
    x=rr2.index.map(lambda x:x.strftime('%b %d')),
    y=rr2['TEMP'],
    name='1996-1997 monthly'))

ply.iplot(traces, filename='WeatherData/test')

But there's some difference between 1997-1998 and 1996-1997. Both 2014-2015 and 1996-1997 are normal years.

<img src="http://data-week.popsci.com/gifs/Temperature-Comparison-Two-ElNino-Years.gif" />

Even in 1997-1998, temperature in CA was only typical. CA is predicted to be warmer in this winter but not very high probability.

## Does this also hold for 1982-1983?

In [135]:
tt = la.set_index('YEARMODA')['1982-7-1':'1983-6-30']
tt2 = la.set_index('YEARMODA')['2014-7-1':'2015-6-30']
rr = tt.resample('M') 
rr2 = tt2.resample('M')
traces = []
traces.append(go.Scatter(
    x=tt.index.map(lambda x:x.strftime('%b %d')),
    y=tt['PRCP'],
    name='1982-1983'))
traces.append(go.Scatter(
    x=tt2.index.map(lambda x:x.strftime('%b %d')),
    y=tt2['PRCP'],
    name='2014-2015'))
traces.append(go.Scatter(
    x=rr.index.map(lambda x:x.strftime('%b %d')),
    y=rr['PRCP'],
    name='1982-1983 monthly'))
traces.append(go.Scatter(
    x=rr2.index.map(lambda x:x.strftime('%b %d')),
    y=rr2['PRCP'],
    name='2014-2015 monthly'))

ply.iplot(traces, filename='WeatherData/test')

Even less significant.

In [134]:
tt = la.set_index('YEARMODA')['1982-7-1':'1983-6-30']
tt2 = la.set_index('YEARMODA')['2014-7-1':'2015-6-30']
rr = tt.resample('M') 
rr2 = tt2.resample('M')
traces = []
traces.append(go.Scatter(
    x=tt.index.map(lambda x:x.strftime('%b %d')),
    y=tt['TEMP'],
    name='1982-1983'))
traces.append(go.Scatter(
    x=tt2.index.map(lambda x:x.strftime('%b %d')),
    y=tt2['TEMP'],
    name='2014-2015'))
traces.append(go.Scatter(
    x=rr.index.map(lambda x:x.strftime('%b %d')),
    y=rr['TEMP'],
    name='1982-1983 monthly'))
traces.append(go.Scatter(
    x=rr2.index.map(lambda x:x.strftime('%b %d')),
    y=rr2['TEMP'],
    name='2014-2015 monthly'))

ply.iplot(traces, filename='WeatherData/test')

# El nino index

In [166]:
a = la.set_index('YEARMODA')[['PRCP']][:'2014']

In [167]:
b = a[a.index.map(lambda x:True if x.month in [11,12,1,2] else False)].resample('M').dropna().shift(-6, 'M')
c = b.groupby(b.index.year).apply(lambda x:x.mean())

## Precipitation in LA

In [168]:
trace0 = go.Scatter(x=c.index,
                    y=c['PRCP'])
ply.iplot([trace0], filename='WeatherData/test')

In [130]:
oci = pd.read_csv('../data/raw/ONI.txt', sep=r'\t')





In [141]:
oci['index'] = oci['Type'].map({'VSL':-2, 'SL':-1.5, 'ML':-1, 'WL':-0.5, 'N':0, 'WE':0.5, 'ME':1, 'SE':1.5, 'VSE':2})

In [159]:
d = pd.concat([c, oci.set_index('From')[['index']]], join='inner', axis=1)

## Precipitation and El nino (Higher index means stronger el nino)

In [163]:
traces = []
traces.append(go.Scatter(x=d['index'], y=d['PRCP'], text=d.index, mode='markers'))
ply.iplot(traces, filename='WeatherData/test')

Positive Correlation between precipitation and el nino. (at least in LA)

## Temperature and El nino

In [169]:
a = la.set_index('YEARMODA')[['TEMP']][:'2014']
b = a[a.index.map(lambda x:True if x.month in [11,12,1,2] else False)].resample('M').dropna().shift(-6, 'M')
c = b.groupby(b.index.year).apply(lambda x:x.mean())
d = pd.concat([c, oci.set_index('From')[['index']]], join='inner', axis=1)
traces = []
traces.append(go.Scatter(x=d['index'], y=d['TEMP'], text=d.index, mode='markers'))
ply.iplot(traces, filename='WeatherData/test')

Not obvious correlation