In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.metrics import mean_squared_error

In [None]:
# Load the dataset
csv_file_path = '/mnt/data/laptopData.csv'
data = pd.read_csv(csv_file_path)

# Ensure all values in 'Cpu' column are strings and fill missing values
data['Cpu'] = data['Cpu'].astype(str).fillna('Unknown')

# Function to extract CPU brand and speed
def extract_cpu_features(cpu_info):
    parts = cpu_info.split()
    brand = parts[0] + ' ' + parts[1]
    speed = float(parts[-1].replace('GHz', '')) if 'GHz' in parts[-1] else 0
    return pd.Series([brand, speed])

# Function to extract GPU brand
def extract_gpu_brand(gpu_info):
    return gpu_info.split()[0]

# Function to extract screen width and height
def extract_screen_resolution(screen_info):
    resolution = screen_info.split()[-1]
    width, height = resolution.split('x')
    return pd.Series([int(width), int(height)])

# Apply the functions to the respective columns
cpu_features = data['Cpu'].apply(extract_cpu_features)
cpu_features.columns = ['Cpu_Brand', 'Cpu_Speed']
data['Gpu'] = data['Gpu'].astype(str).fillna('Unknown')
data['Gpu_Brand'] = data['Gpu'].apply(extract_gpu_brand)
data['ScreenResolution'] = data['ScreenResolution'].astype(str).fillna('0x0')
screen_resolution_features = data['ScreenResolution'].apply(extract_screen_resolution)
screen_resolution_features.columns = ['Screen_Width', 'Screen_Height']

# Add these new features to the original dataframe
data = pd.concat([data, cpu_features, screen_resolution_features], axis=1)

# Selecting relevant features
features = ['Company', 'TypeName', 'Ram', 'Weight', 'Cpu_Brand', 'Cpu_Speed', 'Gpu_Brand', 'Screen_Width', 'Screen_Height']
target = 'Price'

# Label encoding for categorical features
label_encoders = {}
for feature in features:
    if data[feature].dtype == 'object':
        le = LabelEncoder()
        data[feature] = le.fit_transform(data[feature])
        label_encoders[feature] = le

# Remove rows with missing values in the target variable 'Price'
data_cleaned = data.dropna(subset=[target])

# Splitting the data into training and testing sets
X = data_cleaned[features]
y = data_cleaned[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardizing the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Applying Linear Regression
linear_reg = LinearRegression()
linear_reg.fit(X_train, y_train)
y_pred_linear = linear_reg.predict(X_test)
mse_linear = mean_squared_error(y_test, y_pred_linear)

# Applying Ridge Regression
ridge_reg = Ridge()
ridge_reg.fit(X_train, y_train)
y_pred_ridge = ridge_reg.predict(X_test)
mse_ridge = mean_squared_error(y_test, y_pred_ridge)

mse_linear, mse_ridge