In [10]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.tree import DecisionTreeClassifier

In [2]:
#Simple Linear Regression
data = pd.read_csv('mobile phone price prediction.csv')

data['Price'] = data['Price'].str.replace(',', '').astype(int)

X = data[['Spec_score']]
y = data['Price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')

Mean Squared Error: 496563953.95930034
R^2 Score: 0.4492052287143623


In [3]:
# Multiple Linear Regression
X = data[['Spec_score', 'Rating']]

model_multi = LinearRegression()

model_multi.fit(X_train, y_train)

y_pred_multi = model_multi.predict(X_test)

mse_multi = mean_squared_error(y_test, y_pred_multi)
r2_multi = r2_score(y_test, y_pred_multi)

print(f'Mean Squared Error (Multiple): {mse_multi}')
print(f'R^2 Score (Multiple): {r2_multi}')

Mean Squared Error (Multiple): 496563953.95930034
R^2 Score (Multiple): 0.4492052287143623


In [5]:
# Regression Tree Model
tree_model = DecisionTreeRegressor()

tree_model.fit(X_train, y_train)

y_pred_tree = tree_model.predict(X_test)

mse_tree = mean_squared_error(y_test, y_pred_tree)
r2_tree = r2_score(y_test, y_pred_tree)

print(f'Mean Squared Error (Tree): {mse_tree}')
print(f'R^2 Score (Tree): {r2_tree}')

Mean Squared Error (Tree): 338408343.77111655
R^2 Score (Tree): 0.6246333532219278


In [8]:
# Logistic Regression Model
median_price = data['Price'].median()
data['Price_Category'] = (data['Price'] > median_price).astype(int)

X = data[['Spec_score', 'Rating']]
y = data['Price_Category']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

logistic_model = LogisticRegression()

logistic_model.fit(X_train, y_train)

y_pred_logistic = logistic_model.predict(X_test)

print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_logistic))
print("\nClassification Report:\n", classification_report(y_test, y_pred_logistic))

Confusion Matrix:
 [[108  16]
 [ 19 131]]

Classification Report:
               precision    recall  f1-score   support

           0       0.85      0.87      0.86       124
           1       0.89      0.87      0.88       150

    accuracy                           0.87       274
   macro avg       0.87      0.87      0.87       274
weighted avg       0.87      0.87      0.87       274



In [11]:
# Classification Tree Model
tree_classifier = DecisionTreeClassifier()

tree_classifier.fit(X_train, y_train)

y_pred_tree_class = tree_classifier.predict(X_test)

print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_tree_class))
print("\nClassification Report:\n", classification_report(y_test, y_pred_tree_class))

Confusion Matrix:
 [[113  11]
 [ 30 120]]

Classification Report:
               precision    recall  f1-score   support

           0       0.79      0.91      0.85       124
           1       0.92      0.80      0.85       150

    accuracy                           0.85       274
   macro avg       0.85      0.86      0.85       274
weighted avg       0.86      0.85      0.85       274

