In [93]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats as stat

In [94]:
barley_pd = pd.read_csv('CerealData/barley-production.csv')
maize_pd = pd.read_csv('CerealData/maize-production.csv')
rice_pd = pd.read_csv('CerealData/rice-production.csv')
rye_pd = pd.read_csv('CerealData/rye-production.csv')
wheat_pd = pd.read_csv('CerealData/wheat-production.csv')

In [95]:
barley_pd.head()


Unnamed: 0,Entity,Code,Year,Barley | 00000044 || Production | 005510 || tonnes
0,Afghanistan,AFG,1961,378000
1,Afghanistan,AFG,1962,378000
2,Afghanistan,AFG,1963,378000
3,Afghanistan,AFG,1964,380000
4,Afghanistan,AFG,1965,380000


In [96]:
maize_pd.head()


Unnamed: 0,Entity,Code,Year,Maize | 00000056 || Production | 005510 || tonnes
0,Afghanistan,AFG,1961,700000
1,Afghanistan,AFG,1962,700000
2,Afghanistan,AFG,1963,713000
3,Afghanistan,AFG,1964,720000
4,Afghanistan,AFG,1965,720000


In [97]:
barley_pd = barley_pd.drop('Code', axis=1)
maize_pd = maize_pd.drop('Code', axis=1)
rice_pd = rice_pd.drop('Code', axis=1)
rye_pd = rye_pd.drop('Code', axis=1)
wheat_pd = wheat_pd.drop('Code', axis=1)

In [98]:
barley_pd.head()

Unnamed: 0,Entity,Year,Barley | 00000044 || Production | 005510 || tonnes
0,Afghanistan,1961,378000
1,Afghanistan,1962,378000
2,Afghanistan,1963,378000
3,Afghanistan,1964,380000
4,Afghanistan,1965,380000


In [99]:
barley_pd.columns = ['Country', 'Year', 'BarleyTonnes']
maize_pd.columns = ['Country', 'Year', 'MaizeTonnes']
rice_pd.columns = ['Country', 'Year', 'RiceTonnes']
rye_pd.columns = ['Country', 'Year', 'RyeTonnes']
wheat_pd.columns = ['Country', 'Year', 'WheatTonnes']

In [100]:
barley_pd.head()

Unnamed: 0,Country,Year,BarleyTonnes
0,Afghanistan,1961,378000
1,Afghanistan,1962,378000
2,Afghanistan,1963,378000
3,Afghanistan,1964,380000
4,Afghanistan,1965,380000


In [101]:
barley_pd['Barley_PChange'] = barley_pd['BarleyTonnes'].pct_change()
maize_pd['Maize_PChange'] = maize_pd['MaizeTonnes'].pct_change()
rice_pd['Rice_PChange'] = rice_pd['RiceTonnes'].pct_change()
rye_pd['Rye_PChange'] = rye_pd['RyeTonnes'].pct_change()
wheat_pd['Wheat_PChange'] = wheat_pd['WheatTonnes'].pct_change()

In [102]:
maize_pd.head()

Unnamed: 0,Country,Year,MaizeTonnes,Maize_PChange
0,Afghanistan,1961,700000,
1,Afghanistan,1962,700000,0.0
2,Afghanistan,1963,713000,0.018571
3,Afghanistan,1964,720000,0.009818
4,Afghanistan,1965,720000,0.0


In [103]:
barley_pd = barley_pd.dropna()
maize_pd = maize_pd.dropna()
rice_pd = rice_pd.dropna()
rye_pd = rye_pd.dropna()
wheat_pd = wheat_pd.dropna()

In [104]:
barley_pd.head()

Unnamed: 0,Country,Year,BarleyTonnes,Barley_PChange
1,Afghanistan,1962,378000,0.0
2,Afghanistan,1963,378000,0.0
3,Afghanistan,1964,380000,0.005291
4,Afghanistan,1965,380000,0.0
5,Afghanistan,1966,375000,-0.013158


In [105]:
c1 = pd.merge(barley_pd, maize_pd, on=['Year','Country'])
c2 = pd.merge(rice_pd, rye_pd, on=['Year','Country'])
c3 = pd.merge(c1, c2, on=['Year','Country'])
Cereal = pd.merge(c3, wheat_pd, on=['Year','Country'])


In [106]:
Cereal

Unnamed: 0,Country,Year,BarleyTonnes,Barley_PChange,MaizeTonnes,Maize_PChange,RiceTonnes,Rice_PChange,RyeTonnes,Rye_PChange,WheatTonnes,Wheat_PChange
0,Africa,1962,3438275,0.847205,17556073,0.087734,5580715,0.295458,9100,-0.115646,6852639,0.345841
1,Africa,1963,3854812,0.121147,17625503,0.003955,5813046,0.041631,13800,0.516484,7160090,0.044866
2,Africa,1964,2946788,-0.235556,16145294,-0.083981,5833869,0.003582,16100,0.166667,6812840,-0.048498
3,Africa,1965,3143705,0.066824,16934360,0.048873,5472291,-0.061979,13900,-0.136646,6711683,-0.014848
4,Africa,1966,2021568,-0.356947,17891646,0.056529,5558106,0.015682,8411,-0.394892,5531425,-0.175851
...,...,...,...,...,...,...,...,...,...,...,...,...
3665,Yugoslavia,1987,504000,-0.283636,8863000,-0.292432,49100,0.030949,69000,-0.072643,5345000,0.119087
3666,Yugoslavia,1988,615507,0.221244,7699996,-0.131220,35508,-0.276823,76080,0.102609,6300470,0.178760
3667,Yugoslavia,1989,701601,0.139875,9414034,0.222602,27476,-0.226203,75079,-0.013157,5598583,-0.111402
3668,Yugoslavia,1990,691928,-0.013787,6723975,-0.285750,27586,0.004003,71601,-0.046325,6358604,0.135752


In [135]:
#merge all data from one year to one row
def makedata(df, name):
    maindf = pd.DataFrame(Cereal['Year'].unique())
    maindf.columns = ['Year']
    Names = Cereal.loc[Cereal['Year'] == 2020]
    Names = Names['Country'].unique()

    for x in Names:
        tempdf = df.loc[df['Country'] == x]
        tempdf = tempdf.drop(['Country'], axis=1)
        tempdf.columns = ['Year', x + name + '_Tonnes', x + name + '_PChange']
        maindf = pd.merge(maindf, tempdf, on=['Year'])
        
    return maindf
    
        
        
        

In [142]:
barley = makedata(barley_pd, 'Barley')
maize = makedata(maize_pd, 'Maize')
rice = makedata(rice_pd, 'Rice')
rye = makedata(rye_pd, 'Rye')
wheat = makedata(wheat_pd, 'Wheat')

In [143]:
barley_maize = pd.merge(barley, maize, on=['Year'])
rice_rye = pd.merge(rice, rye, on=['Year'])
total = pd.merge(rice_rye, barley_maize, on=['Year'])
Cereals = pd.merge(total, wheat, on=['Year'])

In [147]:
Cereals.shape

(25, 671)

In [152]:
import math
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [151]:
Cereals.describe()

Unnamed: 0,Year,AfricaRice_Tonnes,AfricaRice_PChange,Africa (FAO)Rice_Tonnes,Africa (FAO)Rice_PChange,Americas (FAO)Rice_Tonnes,Americas (FAO)Rice_PChange,ArgentinaRice_Tonnes,ArgentinaRice_PChange,AsiaRice_Tonnes,...,Upper-middle-income countriesWheat_Tonnes,Upper-middle-income countriesWheat_PChange,UzbekistanWheat_Tonnes,UzbekistanWheat_PChange,Western Asia (FAO)Wheat_Tonnes,Western Asia (FAO)Wheat_PChange,Western Europe (FAO)Wheat_Tonnes,Western Europe (FAO)Wheat_PChange,WorldWheat_Tonnes,WorldWheat_PChange
count,25.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0,...,25.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0
mean,2005.92,23235550.0,0.032288,23242440.0,0.032311,33390430.0,0.022819,1164399.0,0.084554,587149400.0,...,242491700.0,0.010425,4847603.0,0.09625,29442810.0,0.01936,62282980.0,0.007728,647136100.0,0.011671
std,9.182955,7991301.0,0.046626,7989254.0,0.046619,4270442.0,0.082212,347066.7,0.324648,72748880.0,...,40558480.0,0.092727,1988516.0,0.198086,2249463.0,0.09023,6973866.0,0.115536,80425180.0,0.051706
min,1992.0,13981970.0,-0.047042,13983170.0,-0.046849,25809620.0,-0.110625,607600.0,-0.455054,482738700.0,...,189780600.0,-0.154449,947400.0,-0.123393,25101640.0,-0.153435,49105250.0,-0.217963,525435300.0,-0.07106
25%,1998.0,16473880.0,-0.000501,16475880.0,-0.000501,29157540.0,-0.025898,903630.0,-0.098274,527834500.0,...,209862000.0,-0.044764,3684200.0,-0.004277,28129540.0,-0.038207,58406480.0,-0.061387,584763400.0,-0.019143
50%,2004.0,18990210.0,0.033418,19026210.0,0.033414,34656150.0,0.000178,1205140.0,-0.001151,555139100.0,...,228627000.0,0.019789,5507600.0,0.016662,29350670.0,0.029789,63750750.0,0.019541,634666000.0,0.006463
75%,2014.0,29836670.0,0.055572,29836670.0,0.055355,36877920.0,0.065004,1404980.0,0.070922,659753700.0,...,270845300.0,0.055062,6612200.0,0.126175,31225850.0,0.093605,67961840.0,0.056288,728757800.0,0.036012
max,2020.0,37889800.0,0.117914,37889800.0,0.117837,38966860.0,0.267245,1748075.0,1.107883,681251500.0,...,316252800.0,0.185551,6964664.0,0.717581,32941530.0,0.175411,74963010.0,0.30229,772290600.0,0.153992


In [153]:
#sclaing the data
#scaler = StandardScaler()
#Cereals = scaler.fit_transform(Cereals)

ValueError: Input contains infinity or a value too large for dtype('float64').

In [None]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=123, shuffle = False, stratify=None)