In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Load the dataset
df = pd.read_csv('finaldata1.csv')

# Preprocess the data
X = df.drop(['energy_kWh after normalisation','date'], axis=1) # Drop non-feature columns
#X = pd.get_dummies(X, columns=['DayOfWeek', 'Seasons']) # One-hot encode categorical features
y = df['energy_kWh after normalisation'] # Target variable

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Random Forest model
rf = RandomForestRegressor(n_estimators=200, max_depth=25, max_features=7, min_samples_split=2,random_state=42)
rf.fit(X_train, y_train)

# Evaluate the model on the testing set
y_pred = rf.predict(X_test)
mse = mean_squared_error(y_test, y_pred)

from sklearn.metrics import r2_score, mean_absolute_error

# Calculate R-squared on the testing set
r2 = r2_score(y_test, y_pred)

# Calculate MAE on the testing set
mae = mean_absolute_error(y_test, y_pred)

# Print the results
print('Random Forest Model Accuracy:')
print('R-squared:', r2)
print('MAE:', mae)
print('Mean Squared Error:', mse)

Random Forest Model Accuracy:
R-squared: 0.9889393439673493
MAE: 0.00012491269588097147
Mean Squared Error: 3.4920929828032784e-05


In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
# Define the hyperparameter grid to search
param_grid = {
    'n_estimators': [50, 100, 150],
    'max_depth': [5, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}


# Create a Random Forest model
rf_model = RandomForestRegressor(random_state=42)

# Use GridSearchCV to perform hyperparameter tuning
grid_search = GridSearchCV(estimator=rf_model, param_grid=param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Print the best hyperparameters found
print('Best hyperparameters:', grid_search.best_params_)

KeyboardInterrupt: ignored

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

# Create a pipeline with standard scaling and Ridge regression
rf_model = RandomForestRegressor(random_state=42)
ridge_model = Ridge(alpha=0.005, random_state=42)
pipeline = make_pipeline(StandardScaler(), ridge_model)

# Fit the pipeline on the training data
pipeline.fit(X_train, y_train)

# Evaluate the pipeline on the testing data
y_pred = pipeline.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print('MSE:', mse)
print('R2 score:', r2)


MSE: 0.03061947856205446
R2 score: 0.09554666001930112


In [None]:
#from sklearn.datasets import load_boston
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
# Train a Random Forest model
rf_model = RandomForestRegressor(n_estimators=200, random_state=42)
rf_model.fit(X_train, y_train)
rf_pred = rf_model.predict(X_test)
rf_mse = mean_squared_error(y_test, rf_pred)
rf_r2 = r2_score(y_test, rf_pred)

# Train a Gradient Boosting model
gb_model = GradientBoostingRegressor(n_estimators=200, random_state=42)
gb_model.fit(X_train, y_train)
gb_pred = gb_model.predict(X_test)
gb_mse = mean_squared_error(y_test, gb_pred)
gb_r2 = r2_score(y_test, gb_pred)

# Combine the predictions of both models using a simple average
ensemble_pred = (rf_pred + gb_pred) / 2
ensemble_mse = mean_squared_error(y_test, ensemble_pred)
ensemble_r2 = r2_score(y_test, ensemble_pred)

# Print the performance of each model and the ensemble
print('Random Forest:')
print('MSE:', rf_mse)
print('R2 score:', rf_r2)
print('')
print('Gradient Boosting:')
print('MSE:', gb_mse)
print('R2 score:', gb_r2)
print('')
print('Ensemble:')
print('MSE:', ensemble_mse)
print('R2 score:', ensemble_r2)


Random Forest:
MSE: 0.013022985267184251
R2 score: 0.6153206039236377

Gradient Boosting:
MSE: 0.021585497642963018
R2 score: 0.36239686777299995

Ensemble:
MSE: 0.014994606317982703
R2 score: 0.55708188372607
