In [162]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [163]:
Data = pd.read_csv('Data/Mobiles_Dataset.csv')

In [164]:
Data.shape

(984, 12)

In [165]:
#reseting index
Data.reset_index(inplace=True)

In [166]:
#Removing Actual Price column
Data = Data.iloc[:,[0,1,3,4,5,6,7,8,9,10,11,12]]

In [167]:
Data.reset_index(inplace=True)

In [168]:
Data.head(1)

Unnamed: 0,level_0,index,Product Name,Discount price,Stars,Rating,Reviews,RAM (GB),Storage (GB),Display Size (inch),Camera,Description,Link
0,0,0,"Apple iPhone 15 (Green, 128 GB)","₹65,999",4.6,"44,793 Ratings","2,402 Reviews",NIL,128,6.1,48MP + 12MP,128 GB ROM15.49 cm (6.1 inch) Super Retina XDR...,https://www.flipkart.com/apple-iphone-15-green...


In [169]:
# renaming columns for easy nomenclature
Data=Data.copy()
Data.rename(columns={"index":"id",
                     "Product Name":"name",
                     "Discount price":"price",
                     "Stars":"stars",
                     "Reviews":"reviews",
                     "Rating":"rating",
                     "RAM (GB)":"RAM",
                     "Storage (GB)":"storage",
                    "Display Size (inch)":"display",
                    "Camera":"camera",
                    "Description":"desc",
                    "Link":"url"},inplace=True)

In [170]:
Data.head(1)

Unnamed: 0,level_0,id,name,price,stars,rating,reviews,RAM,storage,display,camera,desc,url
0,0,0,"Apple iPhone 15 (Green, 128 GB)","₹65,999",4.6,"44,793 Ratings","2,402 Reviews",NIL,128,6.1,48MP + 12MP,128 GB ROM15.49 cm (6.1 inch) Super Retina XDR...,https://www.flipkart.com/apple-iphone-15-green...


In [171]:
# Modifying the product name and removing the whole bracket
Data['name'] = Data['name'].str.replace(r'\s*\(.*?\)', '', regex=True)

In [172]:
# Converting rating column into int
# Removing the word "Ratings" and commas, then convert to integer
Data['rating'] = Data['rating'].str.replace(' Ratings', '').str.replace(',', '').astype(int)

In [173]:
#Same with reviews
Data['reviews'] = Data['reviews'].str.replace(' Reviews', '').str.replace(',', '').astype(int)

In [174]:
#Same with price
Data['price'] = Data['price'].str.replace('₹', '').str.replace(',', '').str.strip()
Data['price'] = Data['price'].replace('', pd.NA)
Data.dropna(subset=['price'], inplace=True)
Data['price'] = Data['price'].astype(int)

In [175]:
Data.dtypes

level_0      int64
id           int64
name        object
price        int32
stars      float64
rating       int32
reviews      int32
RAM         object
storage     object
display    float64
camera      object
desc        object
url         object
dtype: object

In [176]:
Data.shape

(980, 13)

In [178]:
Data=Data.drop_duplicates(['name'])

In [179]:
Data.head(1)

Unnamed: 0,level_0,id,name,price,stars,rating,reviews,RAM,storage,display,camera,desc,url
0,0,0,Apple iPhone 15,65999,4.6,44793,2402,NIL,128,6.1,48MP + 12MP,128 GB ROM15.49 cm (6.1 inch) Super Retina XDR...,https://www.flipkart.com/apple-iphone-15-green...


In [180]:
# Removing Products which has ratings and reviews less than 100
mask1=Data['rating']>=50
mask2=Data['reviews']>=10
Data = Data[mask1 & mask2]

In [181]:
Data.shape

(245, 13)

In [182]:
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [183]:
Data['features'] = (
    Data['name'] + ' ' +
    Data['camera'] + ' ' +
    Data['price'].astype(str) + ' ' +
    Data['stars'].astype(str) + ' ' +
    Data['RAM'].astype(str) + ' ' +
    Data['storage'].astype(str)
)

In [184]:
Data['features'].fillna('unknown', inplace=True)
Data['features'] = Data['features'].astype(str)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  Data['features'].fillna('unknown', inplace=True)


In [185]:
# Use TfidfVectorizer to create a matrix of features
tfidf = TfidfVectorizer()
tfidf_matrix = tfidf.fit_transform(Data['features'])

In [186]:
# Compute cosine similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [187]:
# Save the model and dataset
with open('mobile_recommender_model.pkl', 'wb') as model_file:
    pickle.dump(cosine_sim, model_file)

Data.to_csv('mobiles_dataset.csv', index=False)