In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

In [19]:
data = pd.read_csv('resources/preprocessed_data.csv')

In [None]:
X = data.drop(columns=['Happiness Score'])
y = data['Happiness Score']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestRegressor(n_estimators=600, random_state=42, max_depth=10, min_samples_split=5)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')
print(f'Mean Absolute Error: {mean_absolute_error(y_test, y_pred)}')


Mean Squared Error: 6.921898934296993
R^2 Score: -0.02253608447735722
Mean Absolute Error: 2.273864757189741
Accuracy Score: 0.09333333333333334


In [None]:
data_classification = pd.read_csv('resources/preprocessed_data.csv')
data_classification['Happiness Score'] = data_classification['Happiness Score'].apply(lambda x: 1 if x > 5 else 0)
X_class = data_classification.drop(columns=['Happiness Score'])
y_class = data_classification['Happiness Score']
X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_class, y_class, test_size=0.2, random_state=42)
model_class = RandomForestClassifier(n_estimators=600, random_state=42, max_depth=10, min_samples_split=5)
model_class.fit(X_train_class, y_train_class)
y_pred_class = model_class.predict(X_test_class)
print(f'Classification Report:\n{model_class.score(X_test_class, y_test_class)}')
print(f'Confusion Matrix:\n{pd.crosstab(y_test_class, y_pred_class, rownames=["Actual"], colnames=["Predicted"])}')
print(f'Accuracy: {model_class.score(X_test_class, y_test_class)}')
print(f'F1 Score: {model_class.score(X_test_class, y_test_class)}')
print(f'Precision: {model_class.score(X_test_class, y_test_class)}')
print(f'Recall: {model_class.score(X_test_class, y_test_class)}')
print(f'ROC AUC: {model_class.score(X_test_class, y_test_class)}')