In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
import pickle

# Read the dataset
df = pd.read_csv('laptopprice.csv')

# Drop irrelevant columns and preprocess 'rating' column
df['rate'] = df['rating'].apply(lambda x: int(x.split(' ')[0]))
df.drop(['Number of Ratings', 'rating', 'Number of Reviews'], axis=1, inplace=True)

# Separate features and target variable
X = df.drop('Price', axis=1)
y = df['Price']

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

# Create a pipeline with OneHotEncoder for categorical features and Ridge regression as the model
categorical_features = X.select_dtypes(include=['object'])
cp = Pipeline(steps=[("onehot", OneHotEncoder(handle_unknown='ignore'))])
pr = ColumnTransformer([("categorical", cp, categorical_features.columns)])
pipe = Pipeline(steps=[('preprocessor', pr), ('regressor', Ridge())])

# Fit the pipeline to the training data
pipe.fit(X_train, y_train)

# Evaluate the model on the training and test sets
train_score = pipe.score(X_train, y_train)
test_score = pipe.score(X_test, y_test)

print("Training R^2:", train_score)
print("Test R^2:", test_score)

# Save the model using pickle
with open('pipemodel.pkl', 'wb') as file:
    pickle.dump(pipe, file)

# Assuming these are the input features in the given order
new_data = pd.DataFrame([['Lenovo', 'Intel', 'Core i3', '10th', '4 GB', 'DDR4', '0 GB', '1024 GB', 'Windows',
                          '64-bit', '0 GB', 'Casual', 'No warranty', 'No', 'No']],
                        columns=['brand', 'processor_brand', 'processor_name', 'processor_gnrtn',
                                 'ram_gb', 'ram_type', 'ssd', 'hdd', 'os', 'os_bit', 'graphic_card_gb',
                                 'weight', 'warranty', 'Touchscreen', 'msoffice'])

# Make sure to pass the new_data through the pipeline's predict function
predicted_price = pipe.predict(new_data)

print("Predicted Price: {:.2f}".format(predicted_price[0]))


Training R^2: 0.8065490207111958
Test R^2: 0.7360616993085507
Predicted Price: 44473.67
