In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.ensemble import BaggingRegressor
from sklearn.ensemble import RandomForestRegressor, StackingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [2]:
df = pd.read_csv('mars-weather.csv')

# Drop
df = df.drop('wind_speed', axis=1)
df = df.drop('atmo_opacity', axis=1)
df = df.dropna()

In [3]:
df['terrestrial_date'] = pd.to_datetime(df['terrestrial_date'])
# Extract year, month and day
df['terrestrial_date_year'] = df['terrestrial_date'].dt.year
df['terrestrial_date_month'] = df['terrestrial_date'].dt.month
df['terrestrial_date_day'] = df['terrestrial_date'].dt.day
# Drop the original 'terrestrial_date' column
df = df.drop(['terrestrial_date'], axis=1)

# Convert 'month' from categorical to numerical
le = LabelEncoder()
df['month'] = le.fit_transform(df['month'])

features = df.drop(['max_temp'], axis=1)
# Define Target
target = df['max_temp']

In [4]:
# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2)

## Bagging

In [5]:
# Bagging Regressor Ensemble of RandomForestRegressor and DecisionTreeRegressor
random_forest_model = RandomForestRegressor(max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=500)
decision_tree_model = DecisionTreeRegressor(max_depth=10, min_samples_leaf=5, min_samples_split=15)

# Bagging Regressors
bagging_random_forest_model = BaggingRegressor(base_estimator=random_forest_model, n_estimators=10)
bagging_decision_tree_model = BaggingRegressor(base_estimator=decision_tree_model, n_estimators=10)

# Fit the models
bagging_random_forest_model.fit(X_train, y_train)
bagging_decision_tree_model.fit(X_train, y_train)

# Predict
brf_predictions_train = bagging_random_forest_model.predict(X_train)
brf_predictions_test = bagging_random_forest_model.predict(X_test)
bdt_predictions_train = bagging_decision_tree_model.predict(X_train)
bdt_predictions_test = bagging_decision_tree_model.predict(X_test)

# Evaluate
print("\nBagging Random Forest Regression Train:")
print("Mean Absolute Error (MAE):", mean_absolute_error(y_train, brf_predictions_train))
print("Mean Squared Error (MSE):", mean_squared_error(y_train, brf_predictions_train))
print("R2 Score:", r2_score(y_train, brf_predictions_train))

print("\nBagging Random Forest Regression Test:")
print("Mean Absolute Error (MAE):", mean_absolute_error(y_test, brf_predictions_test))
print("Mean Squared Error (MSE):", mean_squared_error(y_test, brf_predictions_test))
print("R2 Score:", r2_score(y_test, brf_predictions_test))

print("\nBagging Decision Tree Regression Train:")
print("Mean Absolute Error (MAE):", mean_absolute_error(y_train, bdt_predictions_train))
print("Mean Squared Error (MSE):", mean_squared_error(y_train, bdt_predictions_train))
print("R2 Score:", r2_score(y_train, bdt_predictions_train))

print("\nBagging Decision Tree Regression Test:")
print("Mean Absolute Error (MAE):", mean_absolute_error(y_test, bdt_predictions_test))
print("Mean Squared Error (MSE):", mean_squared_error(y_test, bdt_predictions_test))
print("R2 Score:", r2_score(y_test, bdt_predictions_test))



Bagging Random Forest Regression Train:
Mean Absolute Error (MAE): 1.6143890201582702
Mean Squared Error (MSE): 4.319229108742608
R2 Score: 0.9622912419754356

Bagging Random Forest Regression Test:
Mean Absolute Error (MAE): 2.297130652743435
Mean Squared Error (MSE): 8.921980735368676
R2 Score: 0.9216723138178569

Bagging Decision Tree Regression Train:
Mean Absolute Error (MAE): 1.8571012243893763
Mean Squared Error (MSE): 5.9142010057063175
R2 Score: 0.9483664401637312

Bagging Decision Tree Regression Test:
Mean Absolute Error (MAE): 2.3105219375937676
Mean Squared Error (MSE): 9.041579508858351
R2 Score: 0.920622334505468


## Stacking 

In [6]:
# Base models
base_models = [
               ("rf_model",  RandomForestRegressor(max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=500)),
               ("dt_model",DecisionTreeRegressor(max_depth=10, min_samples_leaf=5, min_samples_split=15)),
              ]

# Final model or meta model
final_model = LinearRegression()

# Stacked model
stacked_model = StackingRegressor(estimators=base_models, final_estimator=final_model)

# Fit the stacked model
stacked_model.fit(X_train, y_train)

# Predict
stacked_predictions_train = stacked_model.predict(X_train)
stacked_predictions_test = stacked_model.predict(X_test)

# Evaluate
print("\nStacking Model Train:")
print("Mean Absolute Error (MAE):", mean_absolute_error(y_train, stacked_predictions_train))
print("Mean Squared Error (MSE):", mean_squared_error(y_train, stacked_predictions_train))
print("R2 Score:", r2_score(y_train, stacked_predictions_train))
print("\nStacking Model Test:")
print("Mean Absolute Error (MAE):", mean_absolute_error(y_test, stacked_predictions_test))
print("Mean Squared Error (MSE):", mean_squared_error(y_test, stacked_predictions_test))
print("R2 Score:", r2_score(y_test, stacked_predictions_test))



Stacking Model Train:
Mean Absolute Error (MAE): 1.3022687308887433
Mean Squared Error (MSE): 2.835012995696876
R2 Score: 0.975249097382947

Stacking Model Test:
Mean Absolute Error (MAE): 2.306631883175745
Mean Squared Error (MSE): 9.074220314617973
R2 Score: 0.9203357749548363
