# **Laptop Price Prediction**

# **Import necessary libraries**

In [3]:
# Importing necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt

# Load the dataset
df = pd.read_csv('laptopPrice.csv')

# **Data Exploration**

In [4]:
# 1. Data Exploration

# Check for missing values
print("Missing values in the dataset:\n", df.isnull().sum())

# View dataset info
print("\nDataset Information:\n", df.info())

# View summary statistics
print("\nSummary statistics:\n", df.describe())

Missing values in the dataset:
 brand                0
processor_brand      0
processor_name       0
processor_gnrtn      0
ram_gb               0
ram_type             0
ssd                  0
hdd                  0
os                   0
os_bit               0
graphic_card_gb      0
weight               0
warranty             0
Touchscreen          0
msoffice             0
Price                0
rating               0
Number of Ratings    0
Number of Reviews    0
dtype: int64
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 823 entries, 0 to 822
Data columns (total 19 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   brand              823 non-null    object
 1   processor_brand    823 non-null    object
 2   processor_name     823 non-null    object
 3   processor_gnrtn    823 non-null    object
 4   ram_gb             823 non-null    object
 5   ram_type           823 non-null    object
 6   ssd                823 non-null 

# **Feature Engineering**

In [5]:
# 2. Feature Engineering

# Cleaning columns
df['rating'] = df['rating'].str.extract('(\d)').astype(float) # Extract numeric from rating
df['ram_gb'] = df['ram_gb'].str.replace(' GB', '').astype(int) # Convert RAM to integer
df['ssd'] = df['ssd'].str.replace(' GB', '').astype(int) # Convert SSD to integer
df['hdd'] = df['hdd'].str.replace(' GB', '').astype(int) # Convert HDD to integer
df['graphic_card_gb'] = df['graphic_card_gb'].str.replace(' GB', '').astype(int) # Convert graphic card to integer

# Dropping rows with missing target value (Price)
df = df.dropna(subset=['Price'])

# Convert categorical variables to dummy variables
df = pd.get_dummies(df, columns=['brand', 'processor_brand', 'processor_name', 'processor_gnrtn', 'ram_type', 'os', 'os_bit',
                                 'weight', 'warranty', 'Touchscreen', 'msoffice'], drop_first=True)

# **Model Selection and Training**

In [6]:
# 3. Model Selection and Training

# Define features (X) and target (y)
X = df.drop(columns=['Price'])
y = df['Price']

# Split the data into training and test sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize models
models = {
    'Linear Regression': LinearRegression(),
    'Random Forest': RandomForestRegressor(random_state=42),
    'Gradient Boosting': GradientBoostingRegressor(random_state=42)
}

# Train the models and store results
results = {}
for model_name, model in models.items():
    # Train the model
    model.fit(X_train, y_train)

    # Predict on test set
    y_pred = model.predict(X_test)
        # Model evaluation
    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))

    # Store evaluation results
    results[model_name] = {'MAE': mae, 'RMSE': rmse}

# **Results**

In [7]:
# Display evaluation results
print("\nModel Evaluation Results:")
for model_name, metrics in results.items():
    print(f"{model_name} - MAE: {metrics['MAE']:.2f}, RMSE: {metrics['RMSE']:.2f}")




Model Evaluation Results:
Linear Regression - MAE: 15981.39, RMSE: 25271.94
Random Forest - MAE: 12563.86, RMSE: 22460.72
Gradient Boosting - MAE: 12276.25, RMSE: 22430.57


# **Cross Validations**

In [8]:
# Pro Tip: Cross-validation for model robustness
for model_name, model in models.items():
    cv_scores = cross_val_score(model, X, y, cv=5, scoring='neg_mean_absolute_error')
    print(f"\n{model_name} - Cross-validated MAE: {abs(cv_scores.mean()):.2f}")


Linear Regression - Cross-validated MAE: 23953.91

Random Forest - Cross-validated MAE: 19039.12

Gradient Boosting - Cross-validated MAE: 18809.68
