<a href="https://colab.research.google.com/github/SwatiMishra01/Smart-watch-price-predictor/blob/main/Smart_watch_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Data manipulation
import pandas as pd
import numpy as np

# Data visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Machine learning
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score


In [3]:
from google.colab import files
uploaded = files.upload()


Saving archive (1).zip to archive (1).zip


In [7]:

import pandas as pd

# Correct file path with exact name
csv_file = 'data/Smart watch prices.csv'

# Load dataset
df = pd.read_csv(csv_file)

# Check first few rows
df.head()


Unnamed: 0,Brand,Model,Operating System,Connectivity,Display Type,Display Size (inches),Resolution,Water Resistance (meters),Battery Life (days),Heart Rate Monitor,GPS,NFC,Price (USD)
0,Apple,Watch Series 7,watchOS,"Bluetooth, Wi-Fi, Cellular",Retina,1.9,396 x 484,50,18,Yes,Yes,Yes,$399
1,Samsung,Galaxy Watch 4,Wear OS,"Bluetooth, Wi-Fi, Cellular",AMOLED,1.4,450 x 450,50,40,Yes,Yes,Yes,$249
2,Garmin,Venu 2,Garmin OS,"Bluetooth, Wi-Fi",AMOLED,1.3,416 x 416,50,11,Yes,Yes,No,$399
3,Fitbit,Versa 3,Fitbit OS,"Bluetooth, Wi-Fi",AMOLED,1.58,336 x 336,50,6,Yes,Yes,Yes,$229
4,Fossil,Gen 6,Wear OS,"Bluetooth, Wi-Fi",AMOLED,1.28,416 x 416,30,24,Yes,Yes,Yes,$299


In [15]:
df.columns


Index(['Brand', 'Model', 'Operating System', 'Connectivity', 'Display Type',
       'Display Size (inches)', 'Water Resistance (meters)',
       'Battery Life (days)', 'Heart Rate Monitor', 'GPS', 'NFC',
       'Price (USD)', 'Price', 'Res_Width', 'Res_Height'],
      dtype='object')

In [17]:
# 1. Clean Price column
df['Price'] = df['Price (USD)'].str.replace('$', '', regex=False) \
                               .str.replace(',', '', regex=False) \
                               .str.strip() \
                               .astype(float)

# 2. One-Hot Encoding for categorical columns
categorical_features = ['Brand', 'Model', 'Operating System', 'Connectivity',
                        'Display Type', 'Heart Rate Monitor', 'GPS', 'NFC']

df_encoded = pd.get_dummies(df, columns=categorical_features, drop_first=True)

# 3. Drop original Price (USD) column if it exists
if 'Price (USD)' in df_encoded.columns:
    df_encoded = df_encoded.drop(columns=['Price (USD)'])

# Check the final encoded dataframe
df_encoded.head()


Unnamed: 0,Display Size (inches),Water Resistance (meters),Battery Life (days),Price,Res_Width,Res_Height,Brand_Apple,Brand_Asus,Brand_Casio,Brand_Diesel,...,Display Type_Sunlight-visible,"Display Type_Sunlight-visible, transflective memory-in-pixel (MIP)",Display Type_Super AMOLED,Display Type_TFT,Display Type_TFT LCD,Display Type_TFT-LCD,Display Type_Transflective,Display Type_transflective,GPS_Yes,NFC_Yes
0,1.9,50,18,399.0,396,484,True,False,False,False,...,False,False,False,False,False,False,False,False,True,True
1,1.4,50,40,249.0,450,450,False,False,False,False,...,False,False,False,False,False,False,False,False,True,True
2,1.3,50,11,399.0,416,416,False,False,False,False,...,False,False,False,False,False,False,False,False,True,False
3,1.58,50,6,229.0,336,336,False,False,False,False,...,False,False,False,False,False,False,False,False,True,True
4,1.28,30,24,299.0,416,416,False,False,False,False,...,False,False,False,False,False,False,False,False,True,True


In [18]:
X = df_encoded.drop('Price', axis=1)  # Features
y = df_encoded['Price']               # Target


In [19]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [21]:
# List of numeric columns
numeric_cols = ['Display Size (inches)', 'Water Resistance (meters)',
                'Battery Life (days)', 'Res_Width', 'Res_Height']

# Replace 'Not specified' or any non-numeric value with NaN
for col in numeric_cols:
    df_encoded[col] = pd.to_numeric(df_encoded[col], errors='coerce')

# Fill NaN values with the median of each column
for col in numeric_cols:
    df_encoded[col] = df_encoded[col].fillna(df_encoded[col].median())


In [22]:
X = df_encoded.drop('Price', axis=1)
y = df_encoded['Price']

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)


In [24]:
y_pred = rf.predict(X_test)


In [25]:
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print(f"RMSE: {rmse:.2f}")
print(f"R2 Score: {r2:.2f}")


RMSE: 78.82
R2 Score: 0.66


In [28]:
# 1. Create new watch specs (original columns)
new_watch_raw = pd.DataFrame({
    'Brand': ['Apple'],
    'Model': ['Watch Series 7'],
    'Operating System': ['watchOS'],
    'Connectivity': ['Bluetooth, Wi-Fi, Cellular'],
    'Display Type': ['Retina'],
    'Display Size (inches)': [1.5],
    'Water Resistance (meters)': [50],
    'Battery Life (days)': [14],
    'Heart Rate Monitor': ['Yes'],
    'GPS': ['Yes'],
    'NFC': ['Yes'],
    'Res_Width': [400],
    'Res_Height': [400]
})

# 2. One-hot encode the new watch using the same columns as training
new_watch_encoded = pd.get_dummies(new_watch_raw)

# 3. Add missing columns in one step
missing_cols = [col for col in X_train.columns if col not in new_watch_encoded.columns]
if missing_cols:
    # create a DataFrame with 0s for missing columns
    df_missing = pd.DataFrame(0, index=new_watch_encoded.index, columns=missing_cols)
    new_watch_encoded = pd.concat([new_watch_encoded, df_missing], axis=1)

# 4. Ensure same column order
new_watch_encoded = new_watch_encoded[X_train.columns]

# 5. Predict
predicted_price = rf.predict(new_watch_encoded)
print(f"Predicted Price: ${predicted_price[0]:.2f}")


Predicted Price: $282.90
