# Annualise DMI for mean and variance

## Load data from `.txt`

In [1]:
import pandas as pd
import glob

In [3]:
dataname = 'nao'

In [7]:
fname, = glob.glob(f"datadrive/TeamG/data/{dataname}.csv")
df = pd.read_csv(fname)
df.head()

Unnamed: 0,Date,Value
0,195001,0.92
1,195002,0.4
2,195003,-0.36
3,195004,0.73
4,195005,-0.59


In [8]:
df.tail()

Unnamed: 0,Date,Value
852,202101,-1.11
853,202102,0.14
854,202103,0.73
855,202104,-1.43
856,202105,-1.24


In [10]:
dfl = df

In [23]:
dfl.rename(columns={'Value': dataname}, inplace=True)

## Timey Wimey stuff

In [11]:
dfl['date'] = pd.to_datetime(dfl['Date'], format='%Y%m')

In [28]:
dfl['year'] = dfl['date'].dt.year

In [12]:
dfl['month'] = dfl['date'].dt.month

In [13]:
dfl

Unnamed: 0,Date,Value,date,month
0,195001,0.92,1950-01-01,1
1,195002,0.40,1950-02-01,2
2,195003,-0.36,1950-03-01,3
3,195004,0.73,1950-04-01,4
4,195005,-0.59,1950-05-01,5
...,...,...,...,...
852,202101,-1.11,2021-01-01,1
853,202102,0.14,2021-02-01,2
854,202103,0.73,2021-03-01,3
855,202104,-1.43,2021-04-01,4


In [14]:
dfl['north'] = dfl['date'].dt.to_period('A-NOV')

In [15]:
dfl['south'] = dfl['date'].dt.to_period('A-MAY')

In [16]:
dfl['tropic'] = dfl['date'].dt.to_period('A-APR')

In [17]:
dfl

Unnamed: 0,Date,Value,date,month,north,south,tropic
0,195001,0.92,1950-01-01,1,1950,1950,1950
1,195002,0.40,1950-02-01,2,1950,1950,1950
2,195003,-0.36,1950-03-01,3,1950,1950,1950
3,195004,0.73,1950-04-01,4,1950,1950,1950
4,195005,-0.59,1950-05-01,5,1950,1950,1951
...,...,...,...,...,...,...,...
852,202101,-1.11,2021-01-01,1,2021,2021,2021
853,202102,0.14,2021-02-01,2,2021,2021,2021
854,202103,0.73,2021-03-01,3,2021,2021,2021
855,202104,-1.43,2021-04-01,4,2021,2021,2021


## Aggregate - Mean

In [21]:
def mean_to_csv(var, dataname=dataname):
    (dfl[[var, dataname]]
     .rename(columns={var: 'year', dataname: f"{dataname}_mean_{var}"})
     .groupby('year')
     .mean()
     .to_csv(f'datadrive/TeamG/data/{dataname}_annual_mean_{var}.csv'))

Northern Hemisphere (Dec-Dec)

In [24]:
mean_to_csv('north')

Southern Hemisphere (Jun-Jun)

In [25]:
mean_to_csv('south')

Tropical year (May-May)

In [26]:
mean_to_csv('tropic')

Hsiang 2011 - "Civil conflicts are associated with the global climate" (Mar-Dec) <br>
https://www.nature.com/articles/nature10311

In [29]:
(dfl[dfl.month > 4]
 .groupby('year')
 .mean()[dataname]
 .to_csv(f'datadrive/TeamG/data/{dataname}_annual_mean_hsiang2011.csv')
)

## Aggregate - Variance

In [35]:
def variance_to_csv(var, dataname=dataname):
    (dfl[[var, dataname]]
     .rename(columns={var: 'year', dataname: f"{dataname}_variance_{var}"})
     .groupby('year')
     .var()
     .to_csv(f'datadrive/TeamG/data/{dataname}_annual_variance_{var}.csv'))

Northern Hemisphere (Dec-Dec)

In [36]:
variance_to_csv('north')

Southern Hemisphere (Jun-Jun)

In [37]:
variance_to_csv('south')

Tropical year (May-May)

In [38]:
variance_to_csv('tropic')

Hsiang 2011 - "Civil conflicts are associated with the global climate" (Mar-Dec) <br>
https://www.nature.com/articles/nature10311

In [39]:
(dfl[dfl.month > 4]
 .groupby('year')
 .var()[dataname]
 .to_csv(f'datadrive/TeamG/data/{dataname}_annual_variance_hsiang2011.csv')
)