In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import PolynomialFeatures

In [2]:
# Load data
data = pd.read_csv('Computer_Data.csv')

In [3]:
# Drop unnecessary index column if exists
data = data.drop(columns=['Unnamed: 0'], errors='ignore')

In [4]:
# Encode categorical variables (e.g. 'yes'/'no', brand names, etc.)
data_encoded = pd.get_dummies(data, drop_first=True)

In [5]:
# Define features and target
X = data_encoded.drop(columns=["price"])
y = data_encoded["price"]

In [6]:
# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
# Train the multiple linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

In [8]:
# Make predictions
y_pred = model.predict(X_test)

In [9]:
# Evaluate model performance
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [10]:
print("Linear Regression:")
print("MSE:", mse)
print("R² Score:", r2)

Linear Regression:
MSE: 80333.72043387186
R² Score: 0.7540861281178491


In [11]:
# Feature importance (coefficients)
feature_importance = pd.DataFrame({
    'Feature': X.columns,
    'Coefficient': model.coef_,
    'Abs_Coefficient': np.abs(model.coef_)
}).sort_values(by='Abs_Coefficient', ascending=False)

In [12]:
print("\nTop Features:\n", feature_importance.head())


Top Features:
        Feature  Coefficient  Abs_Coefficient
8  premium_yes  -529.924010       529.924010
3       screen   126.231665       126.231665
7    multi_yes   107.436537       107.436537
6       cd_yes    64.320447        64.320447
5        trend   -51.704344        51.704344


In [13]:
# Refine: Drop less important features (you can decide based on importance or domain knowledge)
X_refined = X.drop(columns=["hd", "ads"], errors='ignore')  # Only if they exist


In [14]:
# Split again
X_train_refined, X_test_refined, y_train, y_test = train_test_split(X_refined, y, test_size=0.2, random_state=42)

In [15]:
# Train new model
model_refined = LinearRegression()
model_refined.fit(X_train_refined, y_train)


In [16]:
# Predict & evaluate
y_pred_refined = model_refined.predict(X_test_refined)
mse_refined = mean_squared_error(y_test, y_pred_refined)
r2_refined = r2_score(y_test, y_pred_refined)

In [17]:
print("\nRefined Linear Regression:")
print("MSE:", mse_refined)
print("R² Score:", r2_refined)


Refined Linear Regression:
MSE: 92829.56129755286
R² Score: 0.7158344376370067


In [18]:
# Polynomial Regression (Try degrees 1, 2, 3)
degrees = [1, 2, 3]
mse_values = []
r2_values = []

In [19]:
for d in degrees:
    poly = PolynomialFeatures(degree=d, include_bias=False)
    X_poly = poly.fit_transform(X)

    X_train_poly, X_test_poly, y_train, y_test = train_test_split(X_poly, y, test_size=0.2, random_state=42)

    model_poly = LinearRegression()
    model_poly.fit(X_train_poly, y_train)

    y_pred_poly = model_poly.predict(X_test_poly)

    mse_values.append(mean_squared_error(y_test, y_pred_poly))
    r2_values.append(r2_score(y_test, y_pred_poly))

In [20]:
# Find the best degree
best_degree = degrees[r2_values.index(max(r2_values))]

In [21]:
print("\nPolynomial Regression Results:")
for i, d in enumerate(degrees):
    print(f"Degree {d}: MSE = {mse_values[i]:.2f}, R² = {r2_values[i]:.4f}")

print(f"\nBest Degree: {best_degree}")


Polynomial Regression Results:
Degree 1: MSE = 80333.72, R² = 0.7541
Degree 2: MSE = 44617.09, R² = 0.8634
Degree 3: MSE = 39302.69, R² = 0.8797

Best Degree: 3
