In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score


In [2]:
df = pd.read_excel("mobile_data.xlsx")

In [4]:

df.columns

Index(['Mobile Name', 'Camera Quality (MP)', 'Processor Speed (GHz)',
       'RAM (GB)', 'Storage (GB)', 'Build Cost ($)',
       'Display Refresh Rate (Hz)', 'Battery Capacity (mAh)', 'Price ($)'],
      dtype='object')

In [5]:
from sklearn.preprocessing import LabelEncoder

# Encode categorical variable
label_encoder = LabelEncoder()
df['Mobile Name'] = label_encoder.fit_transform(df['Mobile Name'])


In [6]:
# Check for missing values
print(df.isnull().sum())

# Fill missing values with mean (if applicable)
df.fillna(df.mean(), inplace=True)

Mobile Name                  0
Camera Quality (MP)          0
Processor Speed (GHz)        0
RAM (GB)                     0
Storage (GB)                 0
Build Cost ($)               0
Display Refresh Rate (Hz)    0
Battery Capacity (mAh)       0
Price ($)                    0
dtype: int64


In [7]:
from sklearn.preprocessing import StandardScaler

# # Standardize numerical features
features = ['Camera Quality (MP)','RAM (GB)','Storage (GB)','Processor Speed (GHz)', 'Build Cost ($)', 'Display Refresh Rate (Hz)', 'Battery Capacity (mAh)']
scaler = StandardScaler()
df[features] = scaler.fit_transform(df[features])
df

Unnamed: 0,Mobile Name,Camera Quality (MP),Processor Speed (GHz),RAM (GB),Storage (GB),Build Cost ($),Display Refresh Rate (Hz),Battery Capacity (mAh),Price ($)
0,0,-0.876656,-1.767767,-0.520306,-0.717741,-0.365365,-0.980285,-0.501291,599
1,1,-0.317088,0.3535534,0.222988,0.12666,0.33726,-0.120386,0.427026,799
2,2,1.485963,1.06066,1.709577,1.815461,1.742508,1.427432,1.355342,999
3,3,-0.876656,-0.3535534,-0.520306,-0.717741,-0.646414,-0.120386,-0.129964,649
4,4,-0.317088,0.7071068,0.222988,0.12666,-0.084315,-0.980285,0.055699,749
5,5,1.485963,1.414214,1.709577,1.815461,1.461459,1.427432,1.726669,1099
6,6,-0.876656,-1.414214,-1.2636,-1.139941,-1.349039,-0.980285,-1.429608,399
7,7,-0.317088,-1.570092e-15,-0.520306,-0.717741,-0.786939,-0.120386,-0.872618,549
8,8,1.485963,0.7071068,0.222988,0.12666,0.758834,1.427432,0.612689,899
9,9,-0.876656,-0.7071068,-1.2636,-0.717741,-1.067989,-0.980285,-1.243944,449


In [8]:
df

Unnamed: 0,Mobile Name,Camera Quality (MP),Processor Speed (GHz),RAM (GB),Storage (GB),Build Cost ($),Display Refresh Rate (Hz),Battery Capacity (mAh),Price ($)
0,0,-0.876656,-1.767767,-0.520306,-0.717741,-0.365365,-0.980285,-0.501291,599
1,1,-0.317088,0.3535534,0.222988,0.12666,0.33726,-0.120386,0.427026,799
2,2,1.485963,1.06066,1.709577,1.815461,1.742508,1.427432,1.355342,999
3,3,-0.876656,-0.3535534,-0.520306,-0.717741,-0.646414,-0.120386,-0.129964,649
4,4,-0.317088,0.7071068,0.222988,0.12666,-0.084315,-0.980285,0.055699,749
5,5,1.485963,1.414214,1.709577,1.815461,1.461459,1.427432,1.726669,1099
6,6,-0.876656,-1.414214,-1.2636,-1.139941,-1.349039,-0.980285,-1.429608,399
7,7,-0.317088,-1.570092e-15,-0.520306,-0.717741,-0.786939,-0.120386,-0.872618,549
8,8,1.485963,0.7071068,0.222988,0.12666,0.758834,1.427432,0.612689,899
9,9,-0.876656,-0.7071068,-1.2636,-0.717741,-1.067989,-0.980285,-1.243944,449


In [9]:
# Example: Drop the 'Mobile Name' column if not needed
df = df.drop(columns=['Mobile Name'])
df

Unnamed: 0,Camera Quality (MP),Processor Speed (GHz),RAM (GB),Storage (GB),Build Cost ($),Display Refresh Rate (Hz),Battery Capacity (mAh),Price ($)
0,-0.876656,-1.767767,-0.520306,-0.717741,-0.365365,-0.980285,-0.501291,599
1,-0.317088,0.3535534,0.222988,0.12666,0.33726,-0.120386,0.427026,799
2,1.485963,1.06066,1.709577,1.815461,1.742508,1.427432,1.355342,999
3,-0.876656,-0.3535534,-0.520306,-0.717741,-0.646414,-0.120386,-0.129964,649
4,-0.317088,0.7071068,0.222988,0.12666,-0.084315,-0.980285,0.055699,749
5,1.485963,1.414214,1.709577,1.815461,1.461459,1.427432,1.726669,1099
6,-0.876656,-1.414214,-1.2636,-1.139941,-1.349039,-0.980285,-1.429608,399
7,-0.317088,-1.570092e-15,-0.520306,-0.717741,-0.786939,-0.120386,-0.872618,549
8,1.485963,0.7071068,0.222988,0.12666,0.758834,1.427432,0.612689,899
9,-0.876656,-0.7071068,-1.2636,-0.717741,-1.067989,-0.980285,-1.243944,449


In [10]:
X = df[features]
y = df['Price ($)']

In [11]:
# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(X_train)
print(X_test)
print(y_train)
print(y_test)

   Camera Quality (MP)  RAM (GB)  Storage (GB)  Processor Speed (GHz)  \
5             1.485963  1.709577      1.815461           1.414214e+00   
0            -0.876656 -0.520306     -0.717741          -1.767767e+00   
7            -0.317088 -0.520306     -0.717741          -1.570092e-15   
2             1.485963  1.709577      1.815461           1.060660e+00   
9            -0.876656 -1.263600     -0.717741          -7.071068e-01   
4            -0.317088  0.222988      0.126660           7.071068e-01   
3            -0.876656 -0.520306     -0.717741          -3.535534e-01   
6            -0.876656 -1.263600     -1.139941          -1.414214e+00   

   Build Cost ($)  Display Refresh Rate (Hz)  Battery Capacity (mAh)  
5        1.461459                   1.427432                1.726669  
0       -0.365365                  -0.980285               -0.501291  
7       -0.786939                  -0.120386               -0.872618  
2        1.742508                   1.427432              

In [12]:
model = LinearRegression()
model.fit(X_train,y_train)

In [13]:
# Testing
y_pred = model.predict(X_test)

print(X_test)
print(y_pred)
# Evaluating the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

   Camera Quality (MP)  RAM (GB)  Storage (GB)  Processor Speed (GHz)  \
8             1.485963  0.222988       0.12666               0.707107   
1            -0.317088  0.222988       0.12666               0.353553   

   Build Cost ($)  Display Refresh Rate (Hz)  Battery Capacity (mAh)  
8        0.758834                   1.427432                0.612689  
1        0.337260                  -0.120386                0.427026  
[1399.03012048  838.0060241 ]
Mean Squared Error: 125775.79565248983
R-squared: -49.310318260995935


In [14]:
print(X_train.describe())
df

       Camera Quality (MP)  RAM (GB)  Storage (GB)  Processor Speed (GHz)  \
count             8.000000  8.000000      8.000000               8.000000   
mean             -0.146109 -0.055747     -0.031665              -0.132583   
std               1.036518  1.187776      1.192829               1.148563   
min              -0.876656 -1.263600     -1.139941              -1.767767   
25%              -0.876656 -0.706129     -0.717741              -0.883883   
50%              -0.596872 -0.520306     -0.717741              -0.176777   
75%               0.133675  0.594635      0.548860               0.795495   
max               1.485963  1.709577      1.815461               1.414214   

       Build Cost ($)  Display Refresh Rate (Hz)  Battery Capacity (mAh)  
count        8.000000                   8.000000                8.000000  
mean        -0.137012                  -0.163381               -0.129964  
std          1.143944                   1.051149                1.153079  
min   

Unnamed: 0,Camera Quality (MP),Processor Speed (GHz),RAM (GB),Storage (GB),Build Cost ($),Display Refresh Rate (Hz),Battery Capacity (mAh),Price ($)
0,-0.876656,-1.767767,-0.520306,-0.717741,-0.365365,-0.980285,-0.501291,599
1,-0.317088,0.3535534,0.222988,0.12666,0.33726,-0.120386,0.427026,799
2,1.485963,1.06066,1.709577,1.815461,1.742508,1.427432,1.355342,999
3,-0.876656,-0.3535534,-0.520306,-0.717741,-0.646414,-0.120386,-0.129964,649
4,-0.317088,0.7071068,0.222988,0.12666,-0.084315,-0.980285,0.055699,749
5,1.485963,1.414214,1.709577,1.815461,1.461459,1.427432,1.726669,1099
6,-0.876656,-1.414214,-1.2636,-1.139941,-1.349039,-0.980285,-1.429608,399
7,-0.317088,-1.570092e-15,-0.520306,-0.717741,-0.786939,-0.120386,-0.872618,549
8,1.485963,0.7071068,0.222988,0.12666,0.758834,1.427432,0.612689,899
9,-0.876656,-0.7071068,-1.2636,-0.717741,-1.067989,-0.980285,-1.243944,449


In [15]:
temp = pd.read_excel("mobile_data.xlsx")

In [16]:
temp

Unnamed: 0,Mobile Name,Camera Quality (MP),Processor Speed (GHz),RAM (GB),Storage (GB),Build Cost ($),Display Refresh Rate (Hz),Battery Capacity (mAh),Price ($)
0,Model A,32,2.4,6,128,250,60,4000,599
1,Model B,50,3.0,8,256,300,90,4500,799
2,Model C,108,3.2,12,512,400,144,5000,999
3,Model D,32,2.8,6,128,230,90,4200,649
4,Model E,50,3.1,8,256,270,60,4300,749
5,Model F,108,3.3,12,512,380,144,5200,1099
6,Model G,32,2.5,4,64,180,60,3500,399
7,Model H,50,2.9,6,128,220,90,3800,549
8,Model I,108,3.1,8,256,330,144,4600,899
9,Model J,32,2.7,4,128,200,60,3600,449
