In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

In [2]:
df = pd.read_csv("../input/laptop_price_cleaned.csv")

In [3]:
target = "Price_euros"

y = df[target]
X = df.drop(target, axis=1)

In [4]:
categorical_columns = X.select_dtypes(include=['category', 'object']).columns.tolist()
continuous_columns = X.select_dtypes(include=['float64', 'int64']).columns.tolist()

In [5]:
print("Categorical features and their unique values")
for c in categorical_columns:
    print(f"{c}:", X[c].nunique())    

Categorical features and their unique values
Company: 9
Product: 618
TypeName: 6
Ram: 9
Memory: 39
OpSys: 9
Model: 225
DisplayType: 23
CpuBrand: 3
CpuProduct: 93
PrimaryMemory: 30
SecondaryMemory: 7
Gpu_Brand: 4
Gpu_Product: 110


In [6]:
continuous_columns

['Inches',
 'Weight_kg',
 'LogPrice',
 'Inches_Binned',
 'Touchscreen',
 'IPS Panel',
 'RetinaDisplay',
 'ResolutionWidth',
 'ResolutionHeight',
 'ResolutionWidthCat',
 'CpuClockSpeed_GHz',
 'Ram_GB']

#### Dropping Columns

In [7]:
cols = ["Product", "Model", "CpuProduct", "LogPrice", "Price_euros", "ResolutionWidth"] # Weight Gpu

df_ = df.drop(cols, axis=1)

In [8]:
df_.columns

Index(['Company', 'TypeName', 'Inches', 'Ram', 'Memory', 'OpSys', 'Weight_kg',
       'Inches_Binned', 'DisplayType', 'Touchscreen', 'IPS Panel',
       'RetinaDisplay', 'ResolutionHeight', 'ResolutionWidthCat', 'CpuBrand',
       'CpuClockSpeed_GHz', 'Ram_GB', 'PrimaryMemory', 'SecondaryMemory',
       'Gpu_Brand', 'Gpu_Product'],
      dtype='object')

In [9]:
categorical_columns = df_.select_dtypes(include=['category', 'object']).columns.tolist()
continuous_columns = df_.select_dtypes(include=['float64', 'int64']).columns.tolist()

In [10]:
binary_cols = ["Touchscreen", "IPS Panel", "RetinaDisplay"]

In [11]:
df_[continuous_columns]

Unnamed: 0,Inches,Weight_kg,Inches_Binned,Touchscreen,IPS Panel,RetinaDisplay,ResolutionHeight,ResolutionWidthCat,CpuClockSpeed_GHz,Ram_GB
0,13.3,1.37,2,0,1,1,1600,2560,2.3,8
1,13.3,1.34,2,0,0,0,900,1440,1.8,8
2,15.6,1.86,4,0,0,0,1080,1920,2.5,8
3,15.4,1.83,4,0,1,1,1800,2880,2.7,16
4,13.3,1.37,2,0,1,1,1600,2560,3.1,8
...,...,...,...,...,...,...,...,...,...,...
1298,14.0,1.80,3,1,1,0,1080,1920,2.5,4
1299,13.3,1.30,2,1,1,0,1800,3200,2.5,16
1300,14.0,1.50,3,0,0,0,768,1366,1.6,2
1301,15.6,2.19,4,0,0,0,768,1366,2.5,6


#### Dummy Encoding

In [12]:
encoder = OneHotEncoder(drop='first', sparse_output=False)

In [13]:
df_encoded = pd.DataFrame(encoder.fit_transform(df_[categorical_columns]))
df_encoded.columns = encoder.get_feature_names_out(categorical_columns)

In [14]:
df_encoded2 = pd.DataFrame(encoder.fit_transform(df_[binary_cols]))
df_encoded2.columns = encoder.get_feature_names_out(binary_cols)

In [15]:
df_encoded = pd.concat([df_encoded, df_encoded2], axis=1)

#### Scaling

In [16]:
scaler = StandardScaler()

In [17]:
for col in binary_cols:
    if col in continuous_columns:
        continuous_columns.remove(col)

In [18]:
df_scaled = scaler.fit_transform(df_[continuous_columns])

In [19]:
df_scaled = pd.DataFrame(df_scaled, columns=continuous_columns)

#### Merge dfs

In [29]:
X = pd.merge(df_scaled, df_encoded, left_index=True, right_index=True)
X

#### Test train split

In [30]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#### Modelling

In [31]:
model = LinearRegression()

model.fit(X_train, y_train)
y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)


print("Coefficients:", model.coef_)
print("Intercept:", model.intercept_)

r2 = r2_score(y_test, y_pred)
print("R^2 Score:", r2)

Mean Squared Error: 174635.29538522955
Coefficients: [  -6.97224924   56.90920525 -118.54217418  165.89933373   17.03066071
  149.6562291   384.24242892]
Intercept: 1128.2863174733209
R^2 Score: 0.6561800046468299
