In [None]:
#Lasso

from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error
import numpy as np
import pandas as pd
from sklearn import metrics

# Load PCA-transformed dataset from a file
file_path = 'pca_transformed_dataset.xlsx'
data = pd.read_excel(file_path)

# Separate features and target variable
X = data.drop('Average store monthly revenue',axis=1)
y = data['Average store monthly revenue']   # Target variable

# Split the dataset into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=45)

# Initialize and train the Lasso Regression model
alpha = 0.003 # Regularization strength
model = Lasso(alpha=alpha)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Calculate NDCG
def get_dcg(y_pred, y_true, k):
    df = pd.DataFrame({"y_pred": y_pred, "y_true": y_true})
    df = df.sort_values(by="y_pred", ascending=False)
    df = df.iloc[0:k, :]
    dcg = df["y_true"] / np.log2(np.arange(1, df["y_true"].count() + 1) + 1)
    dcg = np.sum(dcg)
    return dcg

def get_ndcg(y_pred, y_true, k):
    dcg = get_dcg(y_pred, y_true, k)
    idcg = get_dcg(y_true, y_true, k)
    ndcg = dcg / idcg
    return ndcg

ndcg = get_ndcg(y_pred, y_test, k=30)

# Calculate RMSE
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

# Calculate MAPE
mape = metrics.mean_absolute_percentage_error(y_test, y_pred)

print("RMSE:", rmse)
print("MAPE:", mape)
print("NDCG:", ndcg)



RMSE: 0.10458871271265284
MAPE: 0.6460013765255291
NDCG: 0.8302418629451971


In [None]:
#Parallel Lasso + RF
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import ndcg_score
from sklearn.preprocessing import StandardScaler

# Load dataset from Excel file
data = pd.read_excel('pca_transformed_dataset.xlsx')

# Assume the last column is the target variable, and the rest are features
X = data.drop('Average store monthly revenue',axis=1)
y = data['Average store monthly revenue']   # Target variable

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=49)

# Fit Lasso model
lasso_model = Lasso(alpha=0.1)
lasso_model.fit(X_train, y_train)

# Fit Random Forest model
rf_model = RandomForestRegressor(n_estimators=100, max_depth=10, random_state=42)
rf_model.fit(X_train, y_train)

# Make predictions using Lasso and Random Forest models
lasso_predictions = lasso_model.predict(X_test)
rf_predictions = rf_model.predict(X_test)

# Combine predictions (e.g., by averaging)
ensemble_predictions = (lasso_predictions + rf_predictions) / 2

# Compute performance metrics for the ensemble
rmse_ensemble = np.sqrt(mean_squared_error(y_test, ensemble_predictions))
mape_ensemble = mean_absolute_percentage_error(y_test, ensemble_predictions)
ndcg_ensemble = ndcg_score(y_test.values.reshape(1, -1), ensemble_predictions.reshape(1, -1))

# Print performance metrics for the ensemble
print("Ensemble Performance Metrics:")
print("Root Mean Squared Error (RMSE):", rmse_ensemble)
print("Mean Absolute Percentage Error (MAPE):", mape_ensemble)
print("Normalized Discounted Cumulative Gain (NDCG):", ndcg_ensemble)


Ensemble Performance Metrics:
Root Mean Squared Error (RMSE): 0.12643991380962158
Mean Absolute Percentage Error (MAPE): 0.8566053111366179
Normalized Discounted Cumulative Gain (NDCG): 0.8351859885214562


In [None]:
#Parallel Lasso + NN + RF
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import ndcg_score
from sklearn.preprocessing import StandardScaler

# Load dataset from Excel file
data = pd.read_excel('pca_transformed_dataset.xlsx')

# Assume the last column is the target variable, and the rest are features
X = data.drop('Average store monthly revenue',axis=1)
y = data['Average store monthly revenue']   # Target variable

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit Lasso model
lasso_model = Lasso(alpha=0.001)
lasso_model.fit(X_train, y_train)

# Fit Neural Network model
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
nn_model = MLPRegressor(hidden_layer_sizes=(100,), activation='relu', solver='adam', random_state=46)
nn_model.fit(X_train_scaled, y_train)

# Fit Random Forest model
rf_model = RandomForestRegressor(n_estimators=100, max_depth=10, random_state=42)
rf_model.fit(X_train, y_train)

# Make predictions using Lasso, Neural Network, and Random Forest models
lasso_predictions = lasso_model.predict(X_test)
nn_predictions = nn_model.predict(X_test_scaled)
rf_predictions = rf_model.predict(X_test)

# Combine predictions (e.g., by averaging)
ensemble_predictions = (lasso_predictions + nn_predictions + rf_predictions) / 3

# Compute performance metrics for the ensemble
rmse_ensemble = np.sqrt(mean_squared_error(y_test, ensemble_predictions))
mape_ensemble = mean_absolute_percentage_error(y_test, ensemble_predictions)
ndcg_ensemble = ndcg_score(y_test.values.reshape(1, -1), ensemble_predictions.reshape(1, -1))

# Print performance metrics for the ensemble
print("Ensemble Performance Metrics:")
print("Root Mean Squared Error (RMSE):", rmse_ensemble)
print("Mean Absolute Percentage Error (MAPE):", mape_ensemble)
print("Normalized Discounted Cumulative Gain (NDCG):", ndcg_ensemble)

Ensemble Performance Metrics:
Root Mean Squared Error (RMSE): 0.15479075416808225
Mean Absolute Percentage Error (MAPE): 0.9561147304209078
Normalized Discounted Cumulative Gain (NDCG): 0.8391169549414823


In [None]:
#Parallel Lasso + NN + RF + PCR
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.decomposition import PCA
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import ndcg_score
from sklearn.preprocessing import StandardScaler

# Load dataset from Excel file
data = pd.read_excel('pca_transformed_dataset.xlsx')

# Assume the last column is the target variable, and the rest are features
X = data.drop('Average store monthly revenue',axis=1)
y = data['Average store monthly revenue']   # Target variable

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit Lasso model
lasso_model = Lasso(alpha=0.001)
lasso_model.fit(X_train, y_train)

# Fit Neural Network model
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
nn_model = MLPRegressor(hidden_layer_sizes=(100,), activation='relu', solver='adam', random_state=48)
nn_model.fit(X_train_scaled, y_train)

# Fit Random Forest model
rf_model = RandomForestRegressor(n_estimators=100, max_depth=10, random_state=42)
rf_model.fit(X_train, y_train)

# Apply PCA
pca = PCA(n_components=5)  # You may adjust the number of components based on your data
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

# Fit PCR model
pcr_model = Lasso(alpha=0.1)  # You may adjust alpha based on your data
pcr_model.fit(X_train_pca, y_train)

# Make predictions using Lasso, Neural Network, Random Forest, and PCR models
lasso_predictions = lasso_model.predict(X_test)
nn_predictions = nn_model.predict(X_test_scaled)
rf_predictions = rf_model.predict(X_test)
pcr_predictions = pcr_model.predict(X_test_pca)

# Combine predictions (e.g., by averaging)
ensemble_predictions = (lasso_predictions + nn_predictions + rf_predictions + pcr_predictions) / 4

# Compute performance metrics for the ensemble
rmse_ensemble = np.sqrt(mean_squared_error(y_test, ensemble_predictions))
mape_ensemble = mean_absolute_percentage_error(y_test, ensemble_predictions)
ndcg_ensemble = ndcg_score(y_test.values.reshape(1, -1), ensemble_predictions.reshape(1, -1))

# Print performance metrics for the ensemble
print("Ensemble Performance Metrics:")
print("Root Mean Squared Error (RMSE):", rmse_ensemble)
print("Mean Absolute Percentage Error (MAPE):", mape_ensemble)
print("Normalized Discounted Cumulative Gain (NDCG):", ndcg_ensemble)


Ensemble Performance Metrics:
Root Mean Squared Error (RMSE): 0.1485126092577503
Mean Absolute Percentage Error (MAPE): 0.9595439240640877
Normalized Discounted Cumulative Gain (NDCG): 0.840405995983411


In [None]:
#Parallel Lasso + NN + RF + PCR + GBDT
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.decomposition import PCA
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import ndcg_score
from sklearn.preprocessing import StandardScaler

# Load dataset from Excel file
data = pd.read_excel('pca_transformed_dataset.xlsx')

# Assume the last column is the target variable, and the rest are features
X = data.drop('Average store monthly revenue',axis=1)
y = data['Average store monthly revenue']   # Target variable

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit Lasso model
lasso_model = Lasso(alpha=0.001)
lasso_model.fit(X_train, y_train)

# Fit Neural Network model
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
nn_model = MLPRegressor(hidden_layer_sizes=(100,), activation='relu', solver='adam', random_state=46)
nn_model.fit(X_train_scaled, y_train)

# Fit Random Forest model
rf_model = RandomForestRegressor(n_estimators=100, max_depth=10, random_state=45)
rf_model.fit(X_train, y_train)

# Apply PCA
pca = PCA(n_components=5)  # You may adjust the number of components based on your data
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

# Fit PCR model
pcr_model = Lasso(alpha=0.1)
pcr_model.fit(X_train_pca, y_train)

# Fit Gradient Boosted Decision Trees model
gbdt_model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
gbdt_model.fit(X_train, y_train)

# Make predictions using Lasso, Neural Network, Random Forest, PCR, and GBDT models
lasso_predictions = lasso_model.predict(X_test)
nn_predictions = nn_model.predict(X_test_scaled)
rf_predictions = rf_model.predict(X_test)
pcr_predictions = pcr_model.predict(X_test_pca)
gbdt_predictions = gbdt_model.predict(X_test)

# Combine predictions (e.g., by averaging)
ensemble_predictions = (lasso_predictions + nn_predictions + rf_predictions + pcr_predictions + gbdt_predictions) / 5

# Compute performance metrics for the ensemble
rmse_ensemble = np.sqrt(mean_squared_error(y_test, ensemble_predictions))
mape_ensemble = mean_absolute_percentage_error(y_test, ensemble_predictions)
ndcg_ensemble = ndcg_score(y_test.values.reshape(1, -1), ensemble_predictions.reshape(1, -1))

# Print performance metrics for the ensemble
print("Ensemble Performance Metrics:")
print("Root Mean Squared Error (RMSE):", rmse_ensemble)
print("Mean Absolute Percentage Error (MAPE):", mape_ensemble)
print("Normalized Discounted Cumulative Gain (NDCG):", ndcg_ensemble)


Ensemble Performance Metrics:
Root Mean Squared Error (RMSE): 0.15059364787000587
Mean Absolute Percentage Error (MAPE): 0.9530614543554169
Normalized Discounted Cumulative Gain (NDCG): 0.8417048866229397
