In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error, accuracy_score, precision_score, recall_score
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.cluster import KMeans

In [3]:
try:
    df_laptop = pd.read_csv('laptop_price.csv')
    temp = df_laptop['Cpu'].str.extract(r'(\d+\.\d+)', expand=False)
    df_laptop['Ghz'] = pd.to_numeric(temp, errors='coerce')
    df_laptop['RAM'] = df_laptop['Ram'].str.replace('GB', '', regex=False).astype(float)
    df_laptop['Weight'] = df_laptop['Weight'].str.replace('kg', '', regex=False).astype(float)
    feature_cols = ['RAM', 'Weight', 'Ghz']
    price_col = 'Price_euros'
    df_q9 = df_laptop[feature_cols + [price_col]].dropna()
    X_q9_unscaled = df_q9[feature_cols]
    y_q9 = df_q9[price_col]
    model_unscaled = LinearRegression()
    model_unscaled.fit(X_q9_unscaled, y_q9)
    r2_unscaled = model_unscaled.score(X_q9_unscaled, y_q9)
    scaler = StandardScaler()
    X_q9_scaled = scaler.fit_transform(X_q9_unscaled)
    model_scaled = LinearRegression()
    model_scaled.fit(X_q9_scaled, y_q9)
    r2_scaled = model_scaled.score(X_q9_scaled, y_q9)
    print("WITHOUT SCALING:")
    print("R2:", round(r2_unscaled, 4))
    print("WITH SCALING:")
    print("R2:", round(r2_scaled, 4))
    print("Feature importance:")
    for feat, coef in zip(feature_cols, model_scaled.coef_):
        print(feat, round(coef, 2))
    most_significant = feature_cols[np.argmax(np.abs(model_scaled.coef_))]
    print("Most significant predictor:", most_significant)
except FileNotFoundError:
    print("Laptop dataset not found.")
except Exception as e:
    print("Error:", e)

WITHOUT SCALING:
R2: 0.7088
WITH SCALING:
R2: 0.7088
Feature importance:
RAM 442.76
Weight -133.81
Ghz 83.02
Most significant predictor: RAM
