In [28]:

# importing modules and packages 
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns 
from sklearn.model_selection import train_test_split 
from sklearn.linear_model import LinearRegression 
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler 

In [43]:

# importing data 
df = pd.read_csv('indo_plp_vnm_thai_khm_combined.csv') 

df.drop(df.columns[0], axis=1, inplace=True)

display(df)

Unnamed: 0,Country,Year,Average Consumer Price of Rice per kg (USD),Rice Produced (tonnes),GDP per capita (constant 2015 US$),Average Producer Price of Rice per kg (USD),Population Size,No. of Extreme Weather Events
0,Indonesia,2008,0.66,57661000.00,2487.611297,0.28,237936543.0,12.0
1,Indonesia,2009,0.67,59014000.00,2569.874213,0.21,240981299.0,12.0
2,Indonesia,2010,0.65,59283000.00,2695.868170,0.30,244016173.0,12.0
3,Indonesia,2011,0.88,58259000.00,2826.480441,0.88,247099697.0,13.0
4,Indonesia,2012,1.06,59705000.00,2959.514478,0.88,250222695.0,16.0
...,...,...,...,...,...,...,...,...
71,Philippines,2017,0.79,19276346.63,3289.472272,0.36,106738501.0,9.0
72,Philippines,2018,0.75,19066093.94,3439.100528,0.38,108568836.0,16.0
73,Philippines,2019,0.78,18814827.29,3589.613641,0.33,110380804.0,9.0
74,Philippines,2020,0.72,19294855.51,3195.538887,0.34,112190977.0,14.0


In [44]:
# creating feature and target variables 
X = df[['Rice Produced (tonnes)', "GDP per capita (constant 2015 US$)", "Average Producer Price of Rice per kg (USD)", "Population Size", "No. of Extreme Weather Events"]] 
y = df[["Average Consumer Price of Rice per kg (USD)"]]

display(X)
display(y)

Unnamed: 0,Rice Produced (tonnes),GDP per capita (constant 2015 US$),Average Producer Price of Rice per kg (USD),Population Size,No. of Extreme Weather Events
0,57661000.00,2487.611297,0.28,237936543.0,12.0
1,59014000.00,2569.874213,0.21,240981299.0,12.0
2,59283000.00,2695.868170,0.30,244016173.0,12.0
3,58259000.00,2826.480441,0.88,247099697.0,13.0
4,59705000.00,2959.514478,0.88,250222695.0,16.0
...,...,...,...,...,...
71,19276346.63,3289.472272,0.36,106738501.0,9.0
72,19066093.94,3439.100528,0.38,108568836.0,16.0
73,18814827.29,3589.613641,0.33,110380804.0,9.0
74,19294855.51,3195.538887,0.34,112190977.0,14.0


Unnamed: 0,Average Consumer Price of Rice per kg (USD)
0,0.66
1,0.67
2,0.65
3,0.88
4,1.06
...,...
71,0.79
72,0.75
73,0.78
74,0.72


In [45]:

# creating train and test sets 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=100) 

In [46]:
#Standardize
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test) #Dont fit again

In [47]:

# creating a regression model 
model = LinearRegression() 

In [48]:

# fitting the model 
model.fit(X_train,y_train)

LinearRegression()

In [49]:

# making predictions 
predictions = model.predict(X_test)

In [50]:
print(list(zip(list(X.columns), list(model.coef_[0]))))

[('Rice Produced (tonnes)', -0.1432948633897858), ('GDP per capita (constant 2015 US$)', 0.02283618037343807), ('Average Producer Price of Rice per kg (USD)', 0.09677041837119295), ('Population Size', 0.2364576278861069), ('No. of Extreme Weather Events', 0.0034540190581153402)]


In [51]:
# model evaluation 
print( 
  'mean_squared_error : ', mean_squared_error(y_test, predictions)) 
print( 
  'mean_absolute_error : ', mean_absolute_error(y_test, predictions)) 

r2 = r2_score(y_test, predictions)
print(
    'r square : ', r2 
)

n = X_train.shape[0] #size of training dataset
p = X_train.shape[1] #number of independent variables (features)
adjusted_r2 = 1-(1-r2)*(n-1)/(n-p-1)
print(
    'Adjusted r square : ', adjusted_r2 
)


mean_squared_error :  0.011599626278512305
mean_absolute_error :  0.09637552460672195
r square :  0.8686009914273751
Adjusted r square :  0.8546223734941172
