In [2]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os as os
# Algorithms
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import accuracy_score, mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error


import xgboost as xgb

In [3]:
df = pd.read_excel("Laptops_data.xlsx")
gpu_missing = pd.read_excel("missing_gpu_found.xlsx")
weights_missing = pd.read_excel("missing_weights_found.xlsx")

In [7]:
df.head(5)

Unnamed: 0,Name,Price,Type,Processor Brand,Processor Name,Processor Generation,SSD,SSD Capacity,HDD Capacity,EMMC Storage Capacity,...,Clock Speed,Graphic Processor,Operating System,Touchscreen,Screen Size,Screen Resolution,Screen Type,Weight,Fingerprint Sensor,Backlit Keyboard
0,ASUS Chromebook Touch Intel Celeron Dual Core ...,"₹18,990",Chromebook,Intel,Celeron Dual Core,,No,0,0,64 GB,...,1.1 GHz with Turbo Boost Upto 2.4 GHz,Intel Integrated HD 500,Chrome,Yes,39.62 cm (15.6 inch),1920 x 1080 Pixels,"Full HD LED Backlit LCD Glossy Display (16:9, ...",1.69 Kg,No,No
1,Primebook 4G Android Based MediaTek MT8788 - (...,"₹13,990",Thin and Light Laptop,MediaTek,MediaTek MT8788,,No,0,0,64 GB,...,Max Frequency Up to 2.0 GHz,MediaTek Integrated ARM Mali G72,Prime OS,No,29.46 cm (11.6 Inch),1366 x 768 Pixels,HD IPS,1.065 Kg,No,No
2,Lenovo V15 Ryzen 5 Hexa Core 5500U - (8 GB/512...,"₹35,990",Thin and Light Laptop,AMD,Ryzen 5 Hexa Core,,Yes,512 GB,0,0,...,2.1 GHz with Turbo Boost Upto 4.0 GHz,AMD Radeon AMD,Windows 11 Home,No,39.62 cm (15.6 Inch),1920 x 1080 Pixel,Full HD Anti-glare Display,1.7 Kg,No,No
3,HP 15s (2023) Athlon Dual Core 3050U - (8 GB/5...,"₹27,990",Thin and Light Laptop,AMD,Athlon Dual Core,,Yes,512 GB,0,0,...,2.3 GHz with Turbo Boost Upto 3.2 GHz,AMD Radeon AMD,Windows 11 Home,No,39.62 cm (15.6 Inch),1366 x 768 Pixel,"HD, micro-edge, BrightView, Brightness: 250 ni...",1.69 Kg,No,No
4,HP 15s (2023) Ryzen 3 Dual Core 3250U - (8 GB/...,"₹33,990",Thin and Light Laptop,AMD,Ryzen 3 Dual Core,,Yes,512 GB,0,0,...,2.60 GHz up to 3.50 GHz,AMD Radeon AMD,Windows 11 Home,No,39.62 cm (15.6 Inch),1920 x 1080 Pixel,"Full HD, micro-edge,anti-glare, Brightness: 22...",1.69 Kg,No,No


In [None]:
#Select Target and Features 

X = dataset.data
y = dataset.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# It's often good practice to scale the features such that they have mean=0 and variance=1
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
# Dictionary of different algorithms
regressors = {
    'Linear Regression': LinearRegression(),
    'Decision Tree Regressor': DecisionTreeRegressor(),
    'Random Forest Regressor': RandomForestRegressor(),
    'Gradient Boosting Regressor': GradientBoostingRegressor(),
    'XGBoost Regressor': xgb.XGBRegressor()
}

In [None]:
# Initialize an empty list to store the results
results_list = []

metric_func = mean_squared_error
metric_name = 'Mean Squared Error'
# Train and evaluate each model
for model_name, model_instance in regressors.items():
    model_instance.fit(X_train, y_train)
    y_predicted = model_instance.predict(X_test)
    # Compute metrics
    mse = mean_squared_error(y_test, y_predicted)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_test, y_predicted)
    mape = mean_absolute_percentage_error(y_test, y_predicted)
    r2 = r2_score(y_test, y_predicted)
    
    # Append results
    results_list.append({
        'Model': model_name, 
        'Mean Squared Error': mse,
        'Root Mean Squared Error': rmse,
        'Mean Absolute Error': mae,
        'Mean Absolute Percentage Error': mape,
        'R^2 Score': r2
    })
    # Optionally, print detailed classification report for each model
    # print(f"Classification Report for {model_name}:\n{classification_report(y_test, y_pred)}\n")

# Display the performance of all models in a sorted order
results = pd.DataFrame(results_list)
results = results.sort_values(by='R^2 Score', ascending=True)
print(results)

In [None]:
# Visualization of the results
plt.figure(figsize=(12, 6))
splot = plt.bar(results['Model'], results['Mean Absolute Percentage Error'], color='skyblue')
plt.xlabel('Models')
plt.ylabel('MAPE')
plt.title('Comparison of Model Performance')
plt.xticks(rotation=45, ha="right")
plt.ylim([0.0, 1.0])  # Assuming accuracy as the metric, limit is set to 1.0

# Adding the text labels inside the bars
for bar in splot:
    plt.gca().text(bar.get_x() + bar.get_width() / 2, bar.get_height() - 0.1, f"{bar.get_height():.2%}", 
                 ha='center', color='black', fontsize=11)

plt.tight_layout()
plt.show()