# **Analysing the factors of Laptop pricing**

## **Import Packages**

In [13]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error, r2_score

## **Load Data**

In [14]:
df = pd.read_csv("../data/laptop_price - dataset.csv")

## **Data Preparation**

### **Checking Null Values**

In [None]:
print(df.isnull().sum())

### **Simplify 'Memory' to numeric**

In [None]:
df['Memory'] = df['Memory'].str.replace('GB', '').str.replace('Flash Storage', '').str.replace(' ', '')
df['Memory'] = df['Memory'].str.extract('(\d+)').astype(float)

### **Convert screen resolution into pixels**

In [18]:
df['Resolution_Width'] = df['ScreenResolution'].str.extract(r'(\d+)x\d+').astype(float)
df['Resolution_Height'] = df['ScreenResolution'].str.extract(r'\d+x(\d+)').astype(float)

### **Drop unused columns**

In [19]:
df.drop(['Product', 'ScreenResolution', 'GPU_Type', 'CPU_Type'], axis=1, inplace=True)

### **Encode categorical data**

In [20]:
le = LabelEncoder()
categorical_cols = ['Company', 'TypeName', 'CPU_Company', 'GPU_Company', 'OpSys']
for col in categorical_cols:
    df[col] = le.fit_transform(df[col])

### **Data Seperation As X and Y**

In [22]:
y = df["Price (Euro)"]
x = df.drop("Price (Euro)", axis=1)

### **Data spliting**

In [23]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

## **Model Building**

### **Linear Regression**

In [24]:
lr = LinearRegression()
lr.fit(x_train, y_train)
y_pred_lr = lr.predict(x_test)

### **Linear Regression metrics**

In [None]:
print("Linear Regression:")
print("R^2 Score:", r2_score(y_test, y_pred_lr))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred_lr)))