# Annualise NAO data for mean and variance
Take monthly indices and generate annual averages

## Load data from `.csv`

In [1]:
import pandas as pd
import glob

In [2]:
dataname = 'nao'

In [3]:
fname, = glob.glob(f"../data/{dataname}.csv")
df = pd.read_csv(fname)
df.head()

Unnamed: 0,year,month,nao
0,1950,1,0.92
1,1950,2,0.4
2,1950,3,-0.36
3,1950,4,0.73
4,1950,5,-0.59


In [4]:
df.tail()

Unnamed: 0,year,month,nao
852,2021,1,-1.11
853,2021,2,0.14
854,2021,3,0.73
855,2021,4,-1.43
856,2021,5,-1.24


In [5]:
dfl = df

In [6]:
dfl.rename(columns={'Value': dataname}, inplace=True)

## Timey Wimey stuff

In [8]:
dfl['date'] = pd.to_datetime((dfl['year'].astype(str)+dfl['month'].astype(str)), format='%Y%m')

In [10]:
dfl

Unnamed: 0,year,month,nao,date
0,1950,1,0.92,1950-01-01
1,1950,2,0.40,1950-02-01
2,1950,3,-0.36,1950-03-01
3,1950,4,0.73,1950-04-01
4,1950,5,-0.59,1950-05-01
...,...,...,...,...
852,2021,1,-1.11,2021-01-01
853,2021,2,0.14,2021-02-01
854,2021,3,0.73,2021-03-01
855,2021,4,-1.43,2021-04-01


In [11]:
dfl['north'] = dfl['date'].dt.to_period('A-NOV')

In [12]:
dfl['south'] = dfl['date'].dt.to_period('A-MAY')

In [13]:
dfl['tropic'] = dfl['date'].dt.to_period('A-APR')

In [14]:
dfl

Unnamed: 0,year,month,nao,date,north,south,tropic
0,1950,1,0.92,1950-01-01,1950,1950,1950
1,1950,2,0.40,1950-02-01,1950,1950,1950
2,1950,3,-0.36,1950-03-01,1950,1950,1950
3,1950,4,0.73,1950-04-01,1950,1950,1950
4,1950,5,-0.59,1950-05-01,1950,1950,1951
...,...,...,...,...,...,...,...
852,2021,1,-1.11,2021-01-01,2021,2021,2021
853,2021,2,0.14,2021-02-01,2021,2021,2021
854,2021,3,0.73,2021-03-01,2021,2021,2021
855,2021,4,-1.43,2021-04-01,2021,2021,2021


## Aggregate - Mean

In [15]:
def mean_to_csv(var, dataname=dataname):
    (dfl[[var, dataname]]
     .rename(columns={var: 'year', dataname: f"{dataname}_mean_{var}"})
     .groupby('year')
     .mean()
     .to_csv(f'../data/{dataname}_annual_mean_{var}.csv'))

Northern Hemisphere (Dec-Dec)

In [16]:
mean_to_csv('north')

Southern Hemisphere (Jun-Jun)

In [17]:
mean_to_csv('south')

Tropical year (May-May)

In [18]:
mean_to_csv('tropic')

Hsiang 2011 - "Civil conflicts are associated with the global climate" (Mar-Dec) <br>
https://www.nature.com/articles/nature10311

In [19]:
(dfl[dfl.month > 4]
 .groupby('year')
 .mean()[dataname]
 .to_csv(f'../data/{dataname}_annual_mean_hsiang2011.csv')
)

## Aggregate - Variance

In [20]:
def variance_to_csv(var, dataname=dataname):
    (dfl[[var, dataname]]
     .rename(columns={var: 'year', dataname: f"{dataname}_variance_{var}"})
     .groupby('year')
     .var()
     .to_csv(f'../data/{dataname}_annual_variance_{var}.csv'))

Northern Hemisphere (Dec-Dec)

In [21]:
variance_to_csv('north')

Southern Hemisphere (Jun-Jun)

In [22]:
variance_to_csv('south')

Tropical year (May-May)

In [23]:
variance_to_csv('tropic')

Hsiang 2011 - "Civil conflicts are associated with the global climate" (Mar-Dec) <br>
https://www.nature.com/articles/nature10311

In [24]:
(dfl[dfl.month > 4]
 .groupby('year')
 .var()[dataname]
 .to_csv(f'../data/{dataname}_annual_variance_hsiang2011.csv')
)