In [None]:
import time
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error

# Load data (adjust path as needed)
big_mart_data = pd.read_csv('/content/drive/MyDrive/archive (3)/Train.csv')

# Preprocessing
big_mart_data = big_mart_data.select_dtypes(include=[np.number]).dropna()
X = big_mart_data.drop(columns=['Item_Outlet_Sales'])  # Features
y = big_mart_data['Item_Outlet_Sales']  # Target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardization
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Support Vector Machine (SVM)
svm_model = SVR()
start_time = time.time()
svm_model.fit(X_train_scaled, y_train)
y_pred_svm = svm_model.predict(X_test_scaled)
svm_time = time.time() - start_time
svm_rmse = np.sqrt(mean_squared_error(y_test, y_pred_svm))
print(f"SVM RMSE: {svm_rmse}, Execution Time: {svm_time} sec")

# Random Forest
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
start_time = time.time()
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)
rf_time = time.time() - start_time
rf_rmse = np.sqrt(mean_squared_error(y_test, y_pred_rf))
print(f"Random Forest RMSE: {rf_rmse}, Execution Time: {rf_time} sec")

# K-Nearest Neighbors (KNN)
knn_model = KNeighborsRegressor(n_neighbors=5)
start_time = time.time()
knn_model.fit(X_train_scaled, y_train)
y_pred_knn = knn_model.predict(X_test_scaled)
knn_time = time.time() - start_time
knn_rmse = np.sqrt(mean_squared_error(y_test, y_pred_knn))
print(f"KNN RMSE: {knn_rmse}, Execution Time: {knn_time} sec")

# XGBoost
xgb_model = XGBRegressor(n_estimators=100, random_state=42)
start_time = time.time()
xgb_model.fit(X_train, y_train)
y_pred_xgb = xgb_model.predict(X_test)
xgb_time = time.time() - start_time
xgb_rmse = np.sqrt(mean_squared_error(y_test, y_pred_xgb))
print(f"XGBoost RMSE: {xgb_rmse}, Execution Time: {xgb_time} sec")


SVM RMSE: 1479.373814401402, Execution Time: 1.9056665897369385 sec
Random Forest RMSE: 1081.6580121995075, Execution Time: 2.563660144805908 sec
KNN RMSE: 1228.042641296827, Execution Time: 0.01751542091369629 sec
XGBoost RMSE: 1132.9234603689106, Execution Time: 0.17472362518310547 sec


In [None]:
import pandas as pd
import numpy as np

# Example results dictionary (Replace this with actual results from your model runs)
results = {
    'SVM': {'RMSE': 1250.5, 'Time': 2.3},
    'Random Forest': {'RMSE': 1100.2, 'Time': 4.5},
    'KNN': {'RMSE': 1300.8, 'Time': 1.8},
    'XGBoost': {'RMSE': 1050.3, 'Time': 3.1}
}

# Calculate efficiency percentages
min_rmse = min([results[model]['RMSE'] for model in results])
min_time = min([results[model]['Time'] for model in results])

def calculate_efficiency(value, min_value):
    return (min_value / value) * 100

for model in results:
    results[model]['RMSE Efficiency (%)'] = calculate_efficiency(results[model]['RMSE'], min_rmse)
    results[model]['Time Efficiency (%)'] = calculate_efficiency(results[model]['Time'], min_time)

# Convert results to DataFrame and display
results_df = pd.DataFrame(results).T
print(results_df)


                 RMSE  Time  RMSE Efficiency (%)  Time Efficiency (%)
SVM            1250.5   2.3            83.990404            78.260870
Random Forest  1100.2   4.5            95.464461            40.000000
KNN            1300.8   1.8            80.742620           100.000000
XGBoost        1050.3   3.1           100.000000            58.064516
