In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from mlxtend.regressor import StackingCVRegressor

In [2]:
# Reading data from CSV file
data = pd.read_csv("mumbai.csv")

In [3]:
data['Date']=pd.to_datetime(data[['Year','Month','Day','Hour','Minute']])
data = data.drop(['Year','Month','Day','Hour','Minute'], axis=1)
first_column = data.pop('Date')
data.insert(0, 'Date', first_column)

In [4]:
# Separating features and target variables
X = data.drop(["DNI"], axis=1)
y = data["DNI"]

In [5]:
# Split data into training and test sets
X = X.drop('Date',axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [6]:
# Define the individual models
lr = LinearRegression()
svr = SVR()
dtr = DecisionTreeRegressor()
rfr = RandomForestRegressor()
gbr = GradientBoostingRegressor()

In [7]:
# Train the individual models
lr.fit(X_train, y_train)
svr.fit(X_train, y_train)
dtr.fit(X_train, y_train)
rfr.fit(X_train, y_train)
gbr.fit(X_train, y_train)

In [8]:
# Evaluate the individual models
lr_pred = lr.predict(X_test)
svr_pred = svr.predict(X_test)
dtr_pred = dtr.predict(X_test)
rfr_pred = rfr.predict(X_test)
gbr_pred = gbr.predict(X_test)

In [9]:
print("Linear Regression MSE:", mean_squared_error(y_test, lr_pred, squared=False))
print("SVR MSE:", mean_squared_error(y_test, svr_pred, squared=False))
print("Decision Tree MSE:", mean_squared_error(y_test, dtr_pred, squared=False))
print("Random Forest MSE:", mean_squared_error(y_test, rfr_pred, squared=False))
print("Gradient Boosting MSE:", mean_squared_error(y_test, gbr_pred, squared=False))

Linear Regression MSE: 141.8970732067446
SVR MSE: 276.87327610074647
Decision Tree MSE: 65.74849021074469
Random Forest MSE: 45.82598124283146
Gradient Boosting MSE: 55.96979494857441


In [10]:
# Define the stacked model
stacked = StackingCVRegressor(regressors=[svr, dtr, rfr, gbr],meta_regressor=lr, cv=5)

In [11]:
# Train the stacked model
stacked.fit(X_train, y_train)

In [12]:
# Evaluate the stacked model
stacked_pred = stacked.predict(X_test)
print("Stacked Model MSE:", mean_squared_error(y_test, stacked_pred, squared=False))



Stacked Model MSE: 45.77699921956501




In [13]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, StackingRegressor
from sklearn.metrics import mean_squared_error

# Train individual models
estimators = [('lr', LinearRegression()), ('dt', DecisionTreeRegressor()), ('rf', RandomForestRegressor())]
reg = StackingRegressor(estimators=estimators, final_estimator=LinearRegression())
reg.fit(X_train, y_train)

# Use the trained model to make predictions on the testing set
y_pred = reg.predict(X_test)

# Evaluate the performance of the model
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error: ", mse)

Mean Squared Error:  2104.4660616300566


In [14]:
import pickle

In [15]:
filename = 'reg.pkl'
with open(filename, 'wb') as file:
    pickle.dump(reg, file)

In [16]:
filename = 'stack.pkl'
with open(filename, 'wb') as file:
    pickle.dump(stacked, file)