In [236]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import numpy as np

In [237]:
df = pd.read_csv('ProFuturo.csv')

In [238]:
df.head()

Unnamed: 0,Plan,YEAR,January,February,March,April,May,June,July,August,September,October,November,December
0,ProAhorro,2019,0.36%,0.33%,0.36%,0.35%,0.37%,0.36%,0.37%,0.37%,0.36%,0.37%,0.36%,0.37%
1,ProAhorro,2020,0.37%,0.35%,0.37%,0.34%,0.35%,0.35%,0.35%,0.35%,0.34%,0.35%,0.33%,0.35%
2,ProAhorro,2021,0.34%,0.31%,0.34%,0.33%,0.34%,0.33%,0.33%,0.33%,0.32%,0.32%,0.31%,0.32%
3,ProAhorro,2022,0.31%,0.28%,0.29%,0.28%,0.30%,0.27%,0.29%,0.29%,0.28%,0.30%,0.31%,0.34%
4,ProAhorro,2023,0.34%,0.32%,0.36%,0.36%,0.38%,0.37%,0.39%,0.04%,0.39%,0.41%,0.04%,0.42%


In [239]:
df_melted = pd.melt(df, id_vars=['Plan', 'YEAR'], var_name='Month', value_name='Value')

In [240]:
# Replace 'Null' strings with np.nan
df_melted['Value'] = df_melted['Value'].replace('Null', np.nan)

# Convert 'Value' column to numeric (remove the '%' sign)
df_melted['Value'] = df_melted['Value'].str.rstrip('%').astype(float)


In [241]:
# Merge 'YEAR' and 'Month' columns into a single column
merged_df['Year_Month'] = df_melted['YEAR'].astype(str) + '-' + df_melted['Month']

# Convert 'Year_Month' column to datetime
merged_df['Year_Month'] = pd.to_datetime(merged_df['Year_Month'], format='%Y-%B')
merged_df = merged_df.sort_values(by='Year_Month')
merged_df['Sequential_Period'] = range(1, len(merged_df) + 1)

In [242]:
df = df_melted.merge(merged_df, on=['YEAR', 'Month'])

In [243]:
df = df.dropna()

In [244]:
df = df[['Plan', 'Sequential_Period', 'Value']]

In [257]:
# Create an empty dictionary to store DataFrames for each plan
ProAhorro = df[df['Plan'] == "ProAhorro"]
ProRenta = df[df['Plan'] == "ProRenta"]
ProInversion = df[df['Plan'] == "ProInversion"]
ProVision = df[df['Plan'] == "ProVision"]
ProCapital = df[df['Plan'] == "ProCapital"]

# Start ML

In [275]:
def rf_reg(df):
    X = df[['Sequential_Period']]
    y = df['Value']

    # Step 2: No preprocessing needed as there are no categorical variables

    # Step 3: Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Step 4: Train the Random Forest model
    rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
    rf_model.fit(X_train, y_train)

    # Step 5: Evaluate the model's performance on the testing data
    y_pred = rf_model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)

    # Step 6: Make predictions for future sequential periods
    future_periods = [[61], [62], [63]]  # Example future sequential periods
    future_predictions = rf_model.predict(future_periods)

    future_df = pd.DataFrame({
    'Sequential_Period': [period[0] for period in future_periods],
    'Predicted_Value': future_predictions
    })

    return future_df, mse

In [276]:
ProAhorro_pred = rf_reg(ProAhorro)
ProRenta_pred = rf_reg(ProRenta)
ProCapital_pred = rf_reg(ProCapital)
ProVision_pred = rf_reg(ProVision)
ProInversion_pred = rf_reg(ProInversion)



In [277]:
ProAhorro_pred

(   Sequential_Period  Predicted_Value
 0                 61           0.4115
 1                 62           0.4130
 2                 63           0.4044,
 0.00944183923076926)

In [278]:
ProRenta_pred

(   Sequential_Period  Predicted_Value
 0                 61           0.6032
 1                 62           0.7037
 2                 63           0.4414,
 0.14519560000000004)

In [279]:
ProCapital_pred

(   Sequential_Period  Predicted_Value
 0                 61           1.9344
 1                 62           2.4864
 2                 63           1.7081,
 6.271034426923079)

In [280]:
ProVision_pred

(   Sequential_Period  Predicted_Value
 0                 61           1.4582
 1                 62           1.6412
 2                 63           1.4133,
 0.7078253023076931)

In [281]:
ProInversion_pred

(   Sequential_Period  Predicted_Value
 0                 61           1.3436
 1                 62           1.5611
 2                 63           0.8946,
 1.046691403846154)