# Annualise DMI for mean and variance

## Load data from `.txt`

In [4]:
import pandas as pd
import glob

In [2]:
dataname = 'dmi'

In [7]:
fname, = glob.glob(f"datadrive/TeamG/data/{dataname}.*.txt")
df = pd.read_csv(fname, delim_whitespace=True)
df.head()

Unnamed: 0,year,1,2,3,4,5,6,7,8,9,10,11,12
0,1870,-0.373,-0.256,0.277,0.027,-0.4,-0.434,-0.554,-0.409,-0.622,-0.476,-0.278,-0.306
1,1871,-0.208,-0.09,-0.112,-0.073,-0.035,-0.049,-0.347,-0.263,-0.23,-0.368,-0.094,-0.159
2,1872,0.028,0.121,0.024,-0.009,-0.069,0.03,-0.189,-0.213,-0.227,-0.111,0.017,-0.041
3,1873,0.127,-0.239,-0.304,-0.196,-0.331,-0.473,-0.593,-0.688,-0.588,-0.319,-0.229,-0.233
4,1874,-0.316,-0.308,-0.486,-0.678,-0.361,-0.351,-0.242,-0.232,-0.708,-0.999,-0.48,-0.72


In [8]:
df.tail()

Unnamed: 0,year,1,2,3,4,5,6,7,8,9,10,11,12
147,2017,-0.02,0.181,0.457,0.574,0.616,0.537,0.617,0.461,0.14,0.175,0.412,0.179
148,2018,-0.135,0.295,-0.02,-0.008,0.202,0.269,0.15,0.234,0.71,0.844,0.623,0.379
149,2019,0.452,0.496,0.325,0.333,0.619,0.719,0.693,0.548,0.999,1.123,0.958,0.312
150,2020,0.238,0.134,0.119,0.064,0.378,0.568,0.417,-0.07,-0.084,0.233,0.143,0.1
151,2021,0.116,0.323,0.367,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0


## Save to CSV

In [9]:
df.to_csv(f"datadrive/TeamG/data/{dataname}.csv", index=False)

## Longify

In [10]:
dfl = df.melt(id_vars=['year'], value_vars=map(str, range(1, 13)), var_name='month', value_name=dataname)

## Remove null values

In [11]:
dfl = dfl[dfl[dataname] != -9999.00]

## Timey Wimey stuff

In [27]:
dfl['month'] = pd.to_numeric(dfl['month'])

In [12]:
dfl['date'] = pd.to_datetime(dfl.year.astype(str)+dfl.month.astype(str), format='%Y%m')

In [13]:
dfl

Unnamed: 0,year,month,dmi,date
0,1870,1,-0.373,1870-01-01
1,1871,1,-0.208,1871-01-01
2,1872,1,0.028,1872-01-01
3,1873,1,0.127,1873-01-01
4,1874,1,-0.316,1874-01-01
...,...,...,...,...
1818,2016,12,-0.241,2016-12-01
1819,2017,12,0.179,2017-12-01
1820,2018,12,0.379,2018-12-01
1821,2019,12,0.312,2019-12-01


In [14]:
dfl['north'] = dfl['date'].dt.to_period('A-NOV')

In [15]:
dfl['south'] = dfl['date'].dt.to_period('A-MAY')

In [16]:
dfl['tropic'] = dfl['date'].dt.to_period('A-APR')

In [17]:
dfl

Unnamed: 0,year,month,dmi,date,north,south,tropic
0,1870,1,-0.373,1870-01-01,1870,1870,1870
1,1871,1,-0.208,1871-01-01,1871,1871,1871
2,1872,1,0.028,1872-01-01,1872,1872,1872
3,1873,1,0.127,1873-01-01,1873,1873,1873
4,1874,1,-0.316,1874-01-01,1874,1874,1874
...,...,...,...,...,...,...,...
1818,2016,12,-0.241,2016-12-01,2017,2017,2017
1819,2017,12,0.179,2017-12-01,2018,2018,2018
1820,2018,12,0.379,2018-12-01,2019,2019,2019
1821,2019,12,0.312,2019-12-01,2020,2020,2020


## Aggregate - Mean

In [22]:
def mean_to_csv(var, dataname=dataname):
    (dfl[[var, dataname]]
     .rename(columns={var: 'year', dataname: f"{dataname}_mean_{var}"})
     .groupby('year')
     .mean()
     .to_csv(f'datadrive/TeamG/data/{dataname}_annual_mean_{var}.csv'))

Northern Hemisphere (Dec-Dec)

In [44]:
mean_to_csv('north')

Southern Hemisphere (Jun-Jun)

In [20]:
mean_to_csv('south')

Tropical year (May-May)

In [21]:
mean_to_csv('tropic')

Hsiang 2011 - "Civil conflicts are associated with the global climate" (Mar-Dec) <br>
https://www.nature.com/articles/nature10311

In [37]:
(dfl[dfl.month > 4]
 .groupby('year')
 .mean()[dataname]
 .to_csv(f'datadrive/TeamG/data/{dataname}_annual_mean_hsiang2011.csv')
)

## Aggregate - Variance

In [48]:
def variance_to_csv(var, dataname=dataname):
    (dfl[[var, dataname]]
     .rename(columns={var: 'year', dataname: f"{dataname}_variance_{var}"})
     .groupby('year')
     .var()
     .to_csv(f'datadrive/TeamG/data/{dataname}_annual_variance_{var}.csv'))

Northern Hemisphere (Dec-Dec)

In [49]:
variance_to_csv('north')

Southern Hemisphere (Jun-Jun)

In [50]:
variance_to_csv('south')

Tropical year (May-May)

In [51]:
variance_to_csv('tropic')

Hsiang 2011 - "Civil conflicts are associated with the global climate" (Mar-Dec) <br>
https://www.nature.com/articles/nature10311

In [52]:
(dfl[dfl.month > 4]
 .groupby('year')
 .var()[dataname]
 .to_csv(f'datadrive/TeamG/data/{dataname}_annual_variance_hsiang2011.csv')
)