In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from prophet import Prophet

In [2]:
# reading the dataset
data = pd.read_csv('test.csv')
# displaying the data set to get its initial view
data.head()

Unnamed: 0,NAV,Units,Sale Price/Unit,Repurchase Price/Unit,Date Valued,Scheme Name_Bond Fund,Scheme Name_Jikimu Fund,Scheme Name_Liquid Fund,Scheme Name_Umoja Fund,Scheme Name_Watoto Fund,Scheme Name_Wekeza Maisha Fund
0,302291700000.0,344671800.0,877.0422,868.2718,2022-12-30,0,0,0,1,0,0
1,6658728000.0,8978248.0,741.6512,726.8182,2022-12-30,0,0,0,0,0,1
2,8426930000.0,15378320.0,547.9748,542.4951,2022-12-30,0,0,0,0,1,0
3,19122650000.0,120180800.0,159.1157,155.9333,2022-12-30,0,1,0,0,0,0
4,559272100000.0,1632829000.0,342.5173,342.5173,2022-12-30,0,0,1,0,0,0


In [3]:
# Creating a copy of the data as df for easy naming & for future use as reference
df_copy = data.copy()

# Create a new DataFrame where 'Value' column is equal to 1
datat = data[data['Scheme Name_Wekeza Maisha Fund'] == 1]


# List of columns to drop
columns_to_drop = ['Scheme Name_Liquid Fund', 'Scheme Name_Bond Fund','Scheme Name_Watoto Fund','Scheme Name_Jikimu Fund',
'Scheme Name_Umoja Fund']

# Drop the specified columns
datat.drop(columns=columns_to_drop, inplace=True)

# Checking our data now to see if it has the columns we want
datat.head()


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  datat.drop(columns=columns_to_drop, inplace=True)


Unnamed: 0,NAV,Units,Sale Price/Unit,Repurchase Price/Unit,Date Valued,Scheme Name_Wekeza Maisha Fund
1,6658728000.0,8978247.54,741.6512,726.8182,2022-12-30,1
7,6640438000.0,8957835.83,741.2993,726.4733,2022-12-29,1
13,6631795000.0,8948322.05,741.1216,726.2992,2022-12-28,1
19,6598530000.0,8905299.56,740.9666,726.1473,2022-12-27,1
25,6583067000.0,8892450.96,740.2984,725.4924,2022-12-23,1


In [4]:
# Renaming the column to a suitable format that prophet can use
datat.rename(columns={'Date Valued': 'ds'}, inplace=True)
datat.rename(columns={'Sale Price/Unit': 'y'}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  datat.rename(columns={'Date Valued': 'ds'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  datat.rename(columns={'Sale Price/Unit': 'y'}, inplace=True)


In [5]:
# Create a Prophet forecasting model with the default settings:
model = Prophet()
# Fit the Prophet model to historical data
model.fit(datat)


15:31:49 - cmdstanpy - INFO - Chain [1] start processing
15:31:49 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x17f1434a4d0>

In [6]:
# Create a new DataFrame to hold future dates for forecasting
# The 'periods' parameter specifies how far into the future you want to forecast (180 periods in this case)
future = model.make_future_dataframe(periods=180)  # Change the number of periods as needed

In [7]:
# Use the fitted Prophet model to make predictions for the future dates
forecast = model.predict(future)

In [8]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


In [9]:
# Convert start_date and end_date to the same datetime format
start_date = pd.to_datetime('2023-01-01', format='%Y-%m-%d')
end_date = pd.to_datetime('2023-07-01', format='%Y-%m-%d')

# Use df.loc to filter based on date range
filtered_df = forecast.loc[(forecast['ds'] >= start_date) & (forecast['ds'] <= end_date)]

# Sort the DataFrame 'filtered_df' by the 'ds' column in ascending order
filtered_df = filtered_df.sort_values(by='ds', ascending=True)


In [10]:
# Read data from a CSV file 'Bond Fund_test_1.csv' into a DataFrame 'vz_plot'
vz_plot=pd.read_csv('Test data/Wekeza Maisha Fund_test_1.csv')

# Convert the 'date_valued' column in the 'vz_plot' DataFrame to datetime format
vz_plot['date_valued'] = pd.to_datetime(vz_plot['date_valued'])

# Sort by acsending date values
vz_plot = vz_plot.sort_values(by='date_valued', ascending=True)

# Merge two DataFrames, 'filtered_df' and 'vz_plot', using an inner join on the 'ds' and 'date_valued' columns
# The resulting DataFrame 'merged_df' will contain columns 'ds' and 'yhat' from 'filtered_df' and 'date_valued' 
#and 'nav_per_unit' from 'vz_plot'
merged_df = pd.merge(filtered_df[['ds', 'yhat']], vz_plot[['date_valued', 'nav_per_unit']], left_on='ds', right_on='date_valued', how='inner')

# Drop the duplicate 'date_valued' column
merged_df = merged_df.drop(columns='date_valued')

# Rename columns if needed
merged_df = merged_df.rename(columns={'yhat': 'yhat_values', 'nav_per_unit': 'nav_values'})

# # Display the merged dataframe
# merged_df


# Assuming you have a test set with actual values
y_test = merged_df['nav_values']
# Replace with your actual test data

# Extract the forecasted values from the Prophet forecast object
y_pred = merged_df['yhat_values']

# Calculate Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, y_pred)

# Calculate Mean Squared Error (MSE)
mse = mean_squared_error(y_test, y_pred)

# Optionally, calculate Root Mean Squared Error (RMSE)
rmse = np.sqrt(mse)

# Calculate the R2 score
r2 = r2_score(y_test, y_pred)

# Print the evaluation metrics
print(f'Mean Absolute Error (MAE): {mae}')
print(f'Mean Squared Error (MSE): {mse}')
print(f'Root Mean Squared Error (RMSE): {rmse}')
print("R-squared (R2) Score:", r2)

Mean Absolute Error (MAE): 6.169735887704332
Mean Squared Error (MSE): 78.29148014963968
Root Mean Squared Error (RMSE): 8.848247292522949
R-squared (R2) Score: 0.634836242805642


In [12]:
# Rename the 'yhat_values' column to 'Predicted_nav_values' and 'nav_values' column to 'Real_nav_values'
merged_df.rename(columns={'yhat_values': 'Predicted_nav_values', 'nav_values': 'Real_nav_values'}, inplace=True)
# Store the next 6 months predictions
merged_df.to_csv('Next 6 months prediction for Wekeza Maisha Fund.csv', index=False)

In [13]:
import pickle as pkl

In [14]:
# saving the model as a pkl file
pkl.dump(model,open('Wekeza Maisha fund predictor.pkl','wb'))
