In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import joblib
import matplotlib.pyplot as plt

In [None]:
# Load dataset
file_path = 'laptop_prices.csv'  # Change this to your file path
data = pd.read_csv(file_path)


In [None]:
# Feature Engineering: Convert categorical data using One-Hot Encoding
categorical_columns = ['Brand', 'Processor', 'Storage', 'GPU', 'Resolution', 'Operating_System']
ohe = OneHotEncoder(drop='first', sparse=False)
categorical_data = ohe.fit_transform(data[categorical_columns])
categorical_df = pd.DataFrame(categorical_data, columns=ohe.get_feature_names_out())


In [None]:
# Standardize Numerical Features
numerical_columns = ['RAM_GB', 'Screen_Size_inch', 'Battery_Life_Hours', 'Weight_Kg']
scaler = StandardScaler()
data[numerical_columns] = scaler.fit_transform(data[numerical_columns])

# Combine Processed Data
X = pd.concat([data[numerical_columns], categorical_df], axis=1)
y = data['Price_Dollars']

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Improved Model: Random Forest Regressor
model = RandomForestRegressor(n_estimators=200, max_depth=15, random_state=42)
model.fit(X_train, y_train)


In [None]:
# Model Evaluation
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)


In [None]:
print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"R-squared (R²): {r2}")

In [None]:
# Save the model and preprocessor
joblib.dump(model, 'laptop_price_model.pkl')
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(ohe, 'one_hot_encoder.pkl')


In [None]:
# Plot Actual vs Predicted Prices
plt.figure(figsize=(10,6))
plt.scatter(y_test, y_pred, color='blue', alpha=0.5)
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red', linestyle='--')
plt.xlabel('Actual Price')
plt.ylabel('Predicted Price')
plt.title('Actual vs Predicted Laptop Prices')
plt.show()