In [22]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

Load Datasets

In [23]:
mat = pd.read_csv("dataset/student-mat.csv", sep=";")
por = pd.read_csv("dataset/student-por.csv", sep=";")

In [25]:
# Merge datasets
merge_cols = [
    "school", "sex", "age", "address", "famsize", "Pstatus",
    "Medu", "Fedu", "Mjob", "Fjob", "reason", "nursery", "internet"
]
data = pd.merge(mat, por, on=merge_cols, suffixes=('_mat', '_por'))

print(data.head())

# Use average final grade as target
data['G3'] = (data['G3_mat'] + data['G3_por']) / 2
data.drop(columns=['G3_mat', 'G3_por'], inplace=True)

# Encode categorical features
data = pd.get_dummies(data, drop_first=True)

# Split features and target
X = data.drop(columns='G3')
y = data['G3']

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluation
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.2f}")
print(f"R² Score: {r2:.2f}")
print("\nSample Predictions:")
for true, pred in list(zip(y_test, y_pred))[:10]:
    print(f"Actual: {true:.1f}  |  Predicted: {pred:.1f}")


  school sex  age address famsize Pstatus  Medu  Fedu     Mjob      Fjob  ... famrel_por freetime_por  goout_por  Dalc_por  Walc_por health_por absences_por G1_por G2_por G3_por
0     GP   F   18       U     GT3       A     4     4  at_home   teacher  ...          4            3          4         1         1          3            4      0     11     11
1     GP   F   17       U     GT3       T     1     1  at_home     other  ...          5            3          3         1         1          3            2      9     11     11
2     GP   F   15       U     LE3       T     1     1  at_home     other  ...          4            3          2         2         3          3            6     12     13     12
3     GP   F   15       U     GT3       T     4     2   health  services  ...          3            2          2         1         1          5            0     14     14     14
4     GP   F   16       U     GT3       T     3     3    other     other  ...          4            3         