In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats as stat

In [2]:
barley_pd = pd.read_csv('CerealData/barley-production.csv')
maize_pd = pd.read_csv('CerealData/maize-production.csv')
rice_pd = pd.read_csv('CerealData/rice-production.csv')
rye_pd = pd.read_csv('CerealData/rye-production.csv')
wheat_pd = pd.read_csv('CerealData/wheat-production.csv')

In [3]:
barley_pd.head()


Unnamed: 0,Entity,Code,Year,Barley | 00000044 || Production | 005510 || tonnes
0,Afghanistan,AFG,1961,378000
1,Afghanistan,AFG,1962,378000
2,Afghanistan,AFG,1963,378000
3,Afghanistan,AFG,1964,380000
4,Afghanistan,AFG,1965,380000


In [4]:
maize_pd.head()


Unnamed: 0,Entity,Code,Year,Maize | 00000056 || Production | 005510 || tonnes
0,Afghanistan,AFG,1961,700000
1,Afghanistan,AFG,1962,700000
2,Afghanistan,AFG,1963,713000
3,Afghanistan,AFG,1964,720000
4,Afghanistan,AFG,1965,720000


In [5]:
barley_pd = barley_pd.drop('Code', axis=1)
maize_pd = maize_pd.drop('Code', axis=1)
rice_pd = rice_pd.drop('Code', axis=1)
rye_pd = rye_pd.drop('Code', axis=1)
wheat_pd = wheat_pd.drop('Code', axis=1)

In [6]:
barley_pd.head()

Unnamed: 0,Entity,Year,Barley | 00000044 || Production | 005510 || tonnes
0,Afghanistan,1961,378000
1,Afghanistan,1962,378000
2,Afghanistan,1963,378000
3,Afghanistan,1964,380000
4,Afghanistan,1965,380000


In [7]:
barley_pd.columns = ['Country', 'Year', 'BarleyTonnes']
maize_pd.columns = ['Country', 'Year', 'MaizeTonnes']
rice_pd.columns = ['Country', 'Year', 'RiceTonnes']
rye_pd.columns = ['Country', 'Year', 'RyeTonnes']
wheat_pd.columns = ['Country', 'Year', 'WheatTonnes']

In [8]:
barley_pd.head()

Unnamed: 0,Country,Year,BarleyTonnes
0,Afghanistan,1961,378000
1,Afghanistan,1962,378000
2,Afghanistan,1963,378000
3,Afghanistan,1964,380000
4,Afghanistan,1965,380000


In [9]:
barley_pd['Barley_PChange'] = barley_pd['BarleyTonnes'].pct_change()
maize_pd['Maize_PChange'] = maize_pd['MaizeTonnes'].pct_change()
rice_pd['Rice_PChange'] = rice_pd['RiceTonnes'].pct_change()
rye_pd['Rye_PChange'] = rye_pd['RyeTonnes'].pct_change()
wheat_pd['Wheat_PChange'] = wheat_pd['WheatTonnes'].pct_change()

In [10]:
maize_pd.head()

Unnamed: 0,Country,Year,MaizeTonnes,Maize_PChange
0,Afghanistan,1961,700000,
1,Afghanistan,1962,700000,0.0
2,Afghanistan,1963,713000,0.018571
3,Afghanistan,1964,720000,0.009818
4,Afghanistan,1965,720000,0.0


In [11]:
barley_pd = barley_pd.dropna()
maize_pd = maize_pd.dropna()
rice_pd = rice_pd.dropna()
rye_pd = rye_pd.dropna()
wheat_pd = wheat_pd.dropna()

In [12]:
barley_pd.head()

Unnamed: 0,Country,Year,BarleyTonnes,Barley_PChange
1,Afghanistan,1962,378000,0.0
2,Afghanistan,1963,378000,0.0
3,Afghanistan,1964,380000,0.005291
4,Afghanistan,1965,380000,0.0
5,Afghanistan,1966,375000,-0.013158


In [13]:
c1 = pd.merge(barley_pd, maize_pd, on=['Year','Country'])
c2 = pd.merge(rice_pd, rye_pd, on=['Year','Country'])
c3 = pd.merge(c1, c2, on=['Year','Country'])
Cereal = pd.merge(c3, wheat_pd, on=['Year','Country'])


In [14]:
Cereal

Unnamed: 0,Country,Year,BarleyTonnes,Barley_PChange,MaizeTonnes,Maize_PChange,RiceTonnes,Rice_PChange,RyeTonnes,Rye_PChange,WheatTonnes,Wheat_PChange
0,Africa,1962,3438275,0.847205,17556073,0.087734,5580715,0.295458,9100,-0.115646,6852639,0.345841
1,Africa,1963,3854812,0.121147,17625503,0.003955,5813046,0.041631,13800,0.516484,7160090,0.044866
2,Africa,1964,2946788,-0.235556,16145294,-0.083981,5833869,0.003582,16100,0.166667,6812840,-0.048498
3,Africa,1965,3143705,0.066824,16934360,0.048873,5472291,-0.061979,13900,-0.136646,6711683,-0.014848
4,Africa,1966,2021568,-0.356947,17891646,0.056529,5558106,0.015682,8411,-0.394892,5531425,-0.175851
...,...,...,...,...,...,...,...,...,...,...,...,...
3665,Yugoslavia,1987,504000,-0.283636,8863000,-0.292432,49100,0.030949,69000,-0.072643,5345000,0.119087
3666,Yugoslavia,1988,615507,0.221244,7699996,-0.131220,35508,-0.276823,76080,0.102609,6300470,0.178760
3667,Yugoslavia,1989,701601,0.139875,9414034,0.222602,27476,-0.226203,75079,-0.013157,5598583,-0.111402
3668,Yugoslavia,1990,691928,-0.013787,6723975,-0.285750,27586,0.004003,71601,-0.046325,6358604,0.135752


In [15]:
barley_pd.head()


Unnamed: 0,Country,Year,BarleyTonnes,Barley_PChange
1,Afghanistan,1962,378000,0.0
2,Afghanistan,1963,378000,0.0
3,Afghanistan,1964,380000,0.005291
4,Afghanistan,1965,380000,0.0
5,Afghanistan,1966,375000,-0.013158


In [18]:
#merge all data from one year to one row
def makedata(df):
    
    maindf = barley_pd['Year']
    for x in range(1962,2020):
        yeardf = df.loc[df['Year'] == x]
        maindf = pd.merge(maindf, maize_pd, on=['Year'])
        
    return maindf
    
        
        
        

In [19]:
yeardf -makeddata()

Unnamed: 0,Country,Year,BarleyTonnes,Barley_PChange
1,Afghanistan,1962,378000,0.000000
61,Africa,1962,3438275,0.847205
121,Africa (FAO),1962,3438445,0.847167
181,Albania,1962,6934,-0.223516
241,Algeria,1962,818700,2.863269
...,...,...,...,...
7308,Western Europe (FAO),1962,12545956,0.195031
7368,World,1962,88396959,0.220765
7428,Yemen,1962,140000,-0.007092
7488,Yugoslavia,1962,475000,-0.168126
