In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import StackingRegressor
from sklearn.preprocessing import StandardScaler

# Load training data
x_train = pd.read_csv("./pc_X_train.csv")
y_train = pd.read_csv("./pc_Y_train.csv")['score']

# Convert 'score' column to object type
y_train = y_train.astype(object)

# Train-test split
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_val_scaled = scaler.transform(x_val)

# Base models
base_models = [
    ('random_forest', RandomForestRegressor(random_state=42)),
    ('adaboost', AdaBoostRegressor(random_state=42))
]

# Meta-model
meta_model = LinearRegression()

# StackingRegressor
stacking_regressor = StackingRegressor(estimators=base_models, final_estimator=meta_model)

# Train the stacking model
stacking_regressor.fit(x_train_scaled, y_train)

# Make predictions on the validation set
y_val_pred = stacking_regressor.predict(x_val_scaled)

# Calculate RMSE on the validation set
rmse_val = np.sqrt(mean_squared_error(y_val, y_val_pred))
print("Root Mean Squared Error on Validation Set:", rmse_val)

# Load the test data
x_test = pd.read_csv("./pc_X_test.csv")

# Standardize the test features using the same scaler from the training set
x_test_scaled = scaler.transform(x_test)

# Make predictions on the test set
y_test_pred = stacking_regressor.predict(x_test_scaled)

# Save predictions to a CSV file
predictions_df = pd.DataFrame({'Id': x_test['id'], 'score': y_test_pred})
predictions_df.to_csv('stacking_predictions.csv', index=False)

print("Predictions saved to stacking_predictions.csv")


Root Mean Squared Error on Validation Set: 0.6259367164932004
Predictions saved to stacking_predictions.csv
