In [None]:
import pandas as pd
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from math import sqrt
import matplotlib.pyplot as plt

In [None]:
model_df = pd.read_csv('data/model_df.csv')

In [None]:
X = model_df.drop('anm_tot_fh', axis=1)
y = model_df['anm_tot_fh']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

max_depth = 3
regressor = RandomForestRegressor(max_depth=max_depth)

regressor.fit(X_train, y_train)

y_pred = regressor.predict(X_test)

rmse = sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

y_pred = regressor.predict(X_test)

print("Root Mean Squared Error: ", rmse)
print("R-squared (R2) Score: ", r2)


In [None]:
depths = range(1, 6)

rmse = []
r2 = []

for depth in depths:
    regressor = RandomForestRegressor(max_depth=depth, random_state=42)
    regressor.fit(X_train, y_train)
    predictions = regressor.predict(X_test)
    accuracy = sqrt(mean_squared_error(y_test, predictions))
    rmse.append(accuracy)
    rs = r2_score(y_test, predictions)
    r2.append(rs)


plt.figure(figsize=(8, 6))
plt.plot(depths, rmse, marker='o', linestyle='-', color='b')
plt.title('Depth vs. RMSE for Decision Tree Regressor')
plt.xlabel('Max Depth')
plt.ylabel('RMSE')
plt.xticks(depths)
plt.grid(True)
plt.show()