In [None]:
#Lasso First and PCR after
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
from sklearn.decomposition import PCA
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import ndcg_score

# Load dataset from Excel file
data = pd.read_excel('pca_transformed_dataset.xlsx')

# Assume the last column is the target variable, and the rest are features
X = data.drop('Average store monthly revenue',axis=1)
y = data['Average store monthly revenue']

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=49)

# Fit Lasso model
lasso_model = Lasso(alpha=0.1)
lasso_model.fit(X_train, y_train)

# Make predictions using Lasso model
lasso_predictions = lasso_model.predict(X_test)

# Apply PCR on Lasso predictions
pca_model = PCA()
X_train_pca = pca_model.fit_transform(X_train)
X_test_pca = pca_model.transform(X_test)

# Fit PCR model on Lasso predictions
pcr_model = Lasso(alpha=0.001)
pcr_model.fit(X_train_pca, y_train)

# Make predictions using PCR model
pcr_predictions = pcr_model.predict(X_test_pca)

# Compute performance metrics for PCR model
rmse = np.sqrt(mean_squared_error(y_test, pcr_predictions))
mape = mean_absolute_percentage_error(y_test, pcr_predictions)
ndcg = ndcg_score(y_test.values.reshape(1, -1), pcr_predictions.reshape(1, -1))

# Print performance metrics for PCR model
print("Model Performance Metrics:")
print("Root Mean Squared Error (RMSE):", rmse)
print("Mean Absolute Percentage Error (MAPE):", mape)
print("Normalized Discounted Cumulative Gain (NDCG):", ndcg)


Model Performance Metrics:
Root Mean Squared Error (RMSE): 0.11646832854341554
Mean Absolute Percentage Error (MAPE): 0.6372632916003068
Normalized Discounted Cumulative Gain (NDCG): 0.8497396184826069


In [None]:
#Lasso first and NN after
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import ndcg_score
from sklearn.neural_network import MLPRegressor

# Load dataset from Excel file
data = pd.read_excel('pca_transformed_dataset.xlsx')

# Assume the last column is the target variable, and the rest are features
X = data.drop('Average store monthly revenue',axis=1)
y = data['Average store monthly revenue']

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=45)

# Fit Lasso model
lasso_model = Lasso(alpha=0.001)
lasso_model.fit(X_train, y_train)

# Make predictions using Lasso model
lasso_predictions = lasso_model.predict(X_test)

# Scale features for neural network
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define and train Neural Network
nn_model = MLPRegressor(hidden_layer_sizes=(100, 50), activation='relu', solver='adam', random_state=45)
nn_model.fit(X_train_scaled, y_train)

# Make predictions using Neural Network
nn_predictions = nn_model.predict(X_test_scaled)

# Compute performance metrics for Neural Network
rmse_nn = np.sqrt(mean_squared_error(y_test, nn_predictions))
mape_nn = mean_absolute_percentage_error(y_test, nn_predictions)
ndcg_nn = ndcg_score(y_test.values.reshape(1, -1), nn_predictions.reshape(1, -1))

# Print performance metrics for Neural Network
print("Model Performance Metrics:")
print("Root Mean Squared Error (RMSE):", rmse_nn)
print("Mean Absolute Percentage Error (MAPE):", mape_nn)
print("Normalized Discounted Cumulative Gain (NDCG):", ndcg_nn)


Model Performance Metrics:
Root Mean Squared Error (RMSE): 0.17544530999074168
Mean Absolute Percentage Error (MAPE): 1.132835154035199
Normalized Discounted Cumulative Gain (NDCG): 0.8274047650047966


In [None]:
#Lasso first and GBDT after
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import ndcg_score
from sklearn.preprocessing import StandardScaler

# Load dataset from Excel file
data = pd.read_excel('pca_transformed_dataset.xlsx')

# Assume the last column is the target variable, and the rest are features
X = data.drop('Average store monthly revenue',axis=1)
y = data['Average store monthly revenue']

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=47)

# Fit Lasso model
lasso_model = Lasso(alpha=0.001)  # You may adjust alpha based on your data
lasso_model.fit(X_train, y_train)

# Make predictions using Lasso model
lasso_predictions = lasso_model.predict(X_test)

# Scale features for GBDT
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define and train Gradient Boosted Decision Trees model
gbdt_model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=50)
gbdt_model.fit(X_train_scaled, y_train)

# Make predictions using Gradient Boosted Decision Trees model
gbdt_predictions = gbdt_model.predict(X_test_scaled)

# Compute performance metrics for Gradient Boosted Decision Trees model
rmse_gbdt = np.sqrt(mean_squared_error(y_test, gbdt_predictions))
mape_gbdt = mean_absolute_percentage_error(y_test, gbdt_predictions)
ndcg_gbdt = ndcg_score(y_test.values.reshape(1, -1), gbdt_predictions.reshape(1, -1))

# Print performance metrics for Gradient Boosted Decision Trees model
print("Model Performance Metrics:")
print("Root Mean Squared Error (RMSE):", rmse_gbdt)
print("Mean Absolute Percentage Error (MAPE):", mape_gbdt)
print("Normalized Discounted Cumulative Gain (NDCG):", ndcg_gbdt)


Model Performance Metrics:
Root Mean Squared Error (RMSE): 0.16337627221873477
Mean Absolute Percentage Error (MAPE): 0.5911110919088558
Normalized Discounted Cumulative Gain (NDCG): 0.8388871155120183


In [None]:
#Lasso first and RF after
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import ndcg_score
from sklearn.preprocessing import StandardScaler

# Load dataset from Excel file
data = pd.read_excel('pca_transformed_dataset.xlsx')

# Assume the last column is the target variable, and the rest are features
X = data.drop('Average store monthly revenue',axis=1)
y = data['Average store monthly revenue']

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=50)

# Fit Lasso model
lasso_model = Lasso(alpha=0.1)  # You may adjust alpha based on your data
lasso_model.fit(X_train, y_train)

# Make predictions using Lasso model
lasso_predictions = lasso_model.predict(X_test)

# Scale features for Random Forest
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define and train Random Forest model
rf_model = RandomForestRegressor(n_estimators=100, max_depth=10, random_state=47)
rf_model.fit(X_train_scaled, y_train)

# Make predictions using Random Forest model
rf_predictions = rf_model.predict(X_test_scaled)

# Compute performance metrics for Random Forest model
rmse_rf = np.sqrt(mean_squared_error(y_test, rf_predictions))
mape_rf = mean_absolute_percentage_error(y_test, rf_predictions)
ndcg_rf = ndcg_score(y_test.values.reshape(1, -1), rf_predictions.reshape(1, -1))

# Print performance metrics for Random Forest model
print("Model Performance Metrics:")
print("Root Mean Squared Error (RMSE):", rmse_rf)
print("Mean Absolute Percentage Error (MAPE):", mape_rf)
print("Normalized Discounted Cumulative Gain (NDCG):", ndcg_rf)


Model Performance Metrics:
Root Mean Squared Error (RMSE): 0.12201056740626642
Mean Absolute Percentage Error (MAPE): 0.946430197349634
Normalized Discounted Cumulative Gain (NDCG): 0.8512233655576584
