# Annualise PDO data for mean and variance
Take monthly indices and generate annual averages

## Load data from `.csv`

In [1]:
import pandas as pd
import glob

In [2]:
dataname = 'pdo'

In [3]:
fname, = glob.glob(f"../data/{dataname}.csv")
df = pd.read_csv(fname)
df.head()

Unnamed: 0,date,pdo
0,1854-01-01,0.315
1,1854-02-01,0.165
2,1854-03-01,0.161
3,1854-04-01,0.181
4,1854-05-01,0.199


In [4]:
df.tail()

Unnamed: 0,date,pdo
1999,2020-08-01,-9999.0
2000,2020-09-01,-9999.0
2001,2020-10-01,-9999.0
2002,2020-11-01,-9999.0
2003,2020-12-01,-9999.0


## Remove null values

In [5]:
dfl = df[df[dataname] != -9999.00]

## Timey Wimey stuff

In [6]:
dfl['date'] = pd.to_datetime(dfl['date'])
dfl['month']=dfl['date'].dt.month
dfl['year']=dfl['date'].dt.year

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [7]:
dfl

Unnamed: 0,date,pdo,month,year
0,1854-01-01,0.315,1,1854
1,1854-02-01,0.165,2,1854
2,1854-03-01,0.161,3,1854
3,1854-04-01,0.181,4,1854
4,1854-05-01,0.199,5,1854
...,...,...,...,...
1989,2019-10-01,-0.553,10,2019
1990,2019-11-01,0.012,11,2019
1991,2019-12-01,0.204,12,2019
1992,2020-01-01,-1.439,1,2020


In [8]:
dfl['north'] = dfl['date'].dt.to_period('A-NOV')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [9]:
dfl['south'] = dfl['date'].dt.to_period('A-MAY')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [10]:
dfl['tropic'] = dfl['date'].dt.to_period('A-APR')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [11]:
dfl

Unnamed: 0,date,pdo,month,year,north,south,tropic
0,1854-01-01,0.315,1,1854,1854,1854,1854
1,1854-02-01,0.165,2,1854,1854,1854,1854
2,1854-03-01,0.161,3,1854,1854,1854,1854
3,1854-04-01,0.181,4,1854,1854,1854,1854
4,1854-05-01,0.199,5,1854,1854,1854,1855
...,...,...,...,...,...,...,...
1989,2019-10-01,-0.553,10,2019,2019,2020,2020
1990,2019-11-01,0.012,11,2019,2019,2020,2020
1991,2019-12-01,0.204,12,2019,2020,2020,2020
1992,2020-01-01,-1.439,1,2020,2020,2020,2020


## Aggregate - Mean

In [12]:
def mean_to_csv(var, dataname=dataname):
    (dfl[[var, dataname]]
     .rename(columns={var: 'year', dataname: f"{dataname}_mean_{var}"})
     .groupby('year')
     .mean()
     .to_csv(f'../data/{dataname}_annual_mean_{var}.csv'))

Northern Hemisphere (Dec-Dec)

In [13]:
mean_to_csv('north')

Southern Hemisphere (Jun-Jun)

In [14]:
mean_to_csv('south')

Tropical year (May-May)

In [15]:
mean_to_csv('tropic')

Hsiang 2011 - "Civil conflicts are associated with the global climate" (Mar-Dec) <br>
https://www.nature.com/articles/nature10311

In [16]:
(dfl[dfl.month > 4]
 .groupby('year')
 .mean()[dataname]
 .to_csv(f'../data/{dataname}_annual_mean_hsiang2011.csv')
)

## Aggregate - Variance

In [17]:
def variance_to_csv(var, dataname=dataname):
    (dfl[[var, dataname]]
     .rename(columns={var: 'year', dataname: f"{dataname}_variance_{var}"})
     .groupby('year')
     .var()
     .to_csv(f'../data/{dataname}_annual_variance_{var}.csv'))

Northern Hemisphere (Dec-Dec)

In [18]:
variance_to_csv('north')

Southern Hemisphere (Jun-Jun)

In [19]:
variance_to_csv('south')

Tropical year (May-May)

In [20]:
variance_to_csv('tropic')

Hsiang 2011 - "Civil conflicts are associated with the global climate" (Mar-Dec) <br>
https://www.nature.com/articles/nature10311

In [21]:
(dfl[dfl.month > 4]
 .groupby('year')
 .var()[dataname]
 .to_csv(f'../data/{dataname}_annual_variance_hsiang2011.csv')
)