In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

In [37]:
data = pd.read_csv("data.csv")

In [39]:
data.head(2)

Unnamed: 0,Date,Mean-value,Interest-rate,Month,Year,Quarter,Week-of-year,Week-of-month,Day-of-week,Day-of-year,election-year,kenya_reserves
0,10/11/2016,101.28,0.69,10,2016,4,42,3,2,285,0,10441.5
1,10/13/2016,101.3,0.66,10,2016,4,42,3,4,287,0,10441.5


In [4]:
df = pd.read_csv("data2.csv")

In [6]:
#adding the US year of election

df["US_election"] = np.where(df["Year"].isin([2024, 2020, 2016]), 1, 0)

In [8]:
df.head(3)

Unnamed: 0,Mean-value,Interest-rate,Month,Year,Quarter,Week-of-year,Week-of-month,Day-of-week,Day-of-year,election-year,kenya_reserves,US_election
0,101.28,0.69,10,2016,4,42,3,2,285,0,10441.5,1
1,101.3,0.66,10,2016,4,42,3,4,287,0,10441.5,1
2,101.3,0.65,10,2016,4,43,4,1,291,0,10441.5,1


In [10]:
X = df.drop(columns = ["Mean-value", "Year"], axis=1)
y = df["Mean-value"]

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.25, random_state=42)

In [14]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((1441, 10), (481, 10), (1441,), (481,))

In [16]:
X_train.head(2)

Unnamed: 0,Interest-rate,Month,Quarter,Week-of-year,Week-of-month,Day-of-week,Day-of-year,election-year,kenya_reserves,US_election
810,0.15,4,2,14,1,5,94,0,12285.7,1
1634,5.37,8,3,35,5,4,243,0,13676.1,0


In [18]:
#Algorithms
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from xgboost import XGBRegressor
from sklearn.ensemble import GradientBoostingRegressor
from lightgbm import LGBMRegressor

In [19]:
import os
os.environ["LGBM_VERBOSITY"] = "-1"
algorithms = {
    "Random Forest" : RandomForestRegressor(),
    "KNeighbors" : KNeighborsRegressor(),
    "XGBoost" : XGBRegressor(),
    "Gradient Boost Regressor" : GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3),
    "lightgbm" : LGBMRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, verbose = -1)    
}

In [22]:
for title, algorithm in algorithms.items():
    algorithm.fit(X_train, y_train)
    print(f"{title}: {round(algorithm.score(X_test, y_test)*100,2)}%")



Random Forest: 99.22%
KNeighbors: 99.83%
XGBoost: 99.29%
Gradient Boost Regressor: 99.28%
lightgbm: 99.69%


In [29]:
knb = KNeighborsRegressor()

model_knb = knb.fit(X_train, y_train)

print(model_knb.score(X_test, y_test))

0.9982749175581642


#### Model Save

In [35]:
import joblib
import os
# Save the best model to a file
joblib.dump(model_knb, "KNeighbors.pkl")

['KNeighbors.pkl']