In [20]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
df = pd.read_csv("phones_with_image_path.csv")
df.head()

Unnamed: 0,name,link,price,rating,spec_score,processor_brand,processor_core,clock_speed,has_ir,has_nfc,...,display_refresh_rate,ppi,num_rear_cameras,num_front_cameras,rear_primary_mp,front_primary_mp,expandable_memory_supported,expandable_memory_limit,screen_type,image_path
0,Doogee S119,https://www.smartprix.com/mobiles/doogee-s119-...,29999.0,4.4,89,helio,octa,2.0,0,1,...,120.0,391.64,3,1,100.0,16.0,1,512.0,LCD,https://cdn1.smartprix.com/rx-iDLrTC6Js-w420-h...
1,Realme Narzo 80x 5G (8GB RAM + 128GB),https://www.smartprix.com/mobiles/realme-narzo...,14499.0,4.05,80,dimensity,octa,2.5,0,0,...,120.0,391.64,2,1,50.0,8.0,1,1024.0,LCD,https://cdn1.smartprix.com/rx-iMN81O6Eq-w420-h...
2,OnePlus 5,https://www.smartprix.com/mobiles/oneplus-5-p1...,22999.0,4.4,74,snapdragon,octa,2.45,0,1,...,120.0,400.53,2,1,20.0,16.0,0,0.0,AMOLED,https://cdn1.smartprix.com/rx-iMRK2tPAg-w420-h...
3,Xiaomi Redmi Note 10 (6GB RAM + 128GB),https://www.smartprix.com/mobiles/xiaomi-redmi...,14999.0,4.5,78,snapdragon,octa,2.2,1,0,...,120.0,409.3,4,1,48.0,13.0,1,512.0,SUPER AMOLED,https://cdn1.smartprix.com/rx-iOuwWGdpp-w420-h...
4,Realme Narzo 11,https://www.smartprix.com/mobiles/realme-narzo...,12999.0,4.35,75,helio,octa,2.2,0,0,...,120.0,265.84,4,1,50.0,16.0,1,256.0,LCD,https://cdn1.smartprix.com/rx-iRcwwMVqi-w420-h...


In [8]:
features = df.drop(columns = ['name', 'link', 'image_path'])
categorical_columns = ['processor_brand', 'brand', 'processor_core', 'screen_type']
numerical_cols = [i for i in features.columns if i not in categorical_columns]

In [15]:
preprocessor = ColumnTransformer(transformers = [
    ('cat', OneHotEncoder(handle_unknown = 'ignore'), categorical_columns),
    ('num', StandardScaler(), numerical_cols)
])

In [18]:
# Transforming the features
X = preprocessor.fit_transform(features)

In [38]:
sorted(list(enumerate(cosine_similarity(X)[0])), key = lambda x: x[1], reverse = True)

0

In [39]:
similarity_matrix = cosine_similarity(X)

In [70]:
def recommend(phone_name):
    idx = df[df['name'].str.lower() == phone_name.lower()].index[0]
    similarity_scores = similarity_matrix[idx]
    similarity_scores = sorted(list(enumerate(similarity_scores)), key = lambda x: x[1], reverse = True)
    top_indices = [i[0] for i in similarity_scores[:5]]
    similar_phones = df.loc[top_indices, ['name', 'link', 'price', 'ram_gb', 'rom_gb', 'rear_primary_mp', 'front_primary_mp', 'image_path']]
    return similar_phones

In [71]:
df['name'].unique()

array(['Doogee S119', 'Realme Narzo 80x 5G (8GB RAM + 128GB)',
       'OnePlus 5', ..., 'Motorola Moto G Go', 'OPPO Reno 2',
       'Apple iPhone 14 Plus'], dtype=object)

In [72]:
similar_phones = recommend("Realme Narzo 50")

In [73]:
similar_phones

Unnamed: 0,name,link,price,ram_gb,rom_gb,rear_primary_mp,front_primary_mp,image_path
23,Realme Narzo 50,https://www.smartprix.com/mobiles/realme-narzo...,15999.0,4.0,64.0,50.0,16.0,https://cdn1.smartprix.com/rx-iafZEXyVl-w420-h...
1323,Realme 8i,https://www.smartprix.com/mobiles/realme-8i-pp...,9690.0,4.0,64.0,50.0,16.0,https://cdn1.smartprix.com/rx-iALTIcaCV-w420-h...
172,Vivo Y19,https://www.smartprix.com/mobiles/vivo-y19-ppd...,20000.0,4.0,128.0,16.0,16.0,https://cdn1.smartprix.com/rx-i2ELF2qhU-w420-h...
1454,Realme 6i,https://www.smartprix.com/mobiles/realme-6i-pp...,9999.0,4.0,64.0,48.0,16.0,https://cdn1.smartprix.com/rx-igAvq7Z0m-w420-h...
1263,Realme 6i (6GB RAM + 64GB),https://www.smartprix.com/mobiles/realme-6i-6g...,10990.0,6.0,64.0,48.0,16.0,https://cdn1.smartprix.com/rx-iptc97ljb-w420-h...


In [76]:
for i, row in similar_phones.iterrows():
    print(row['name'])

Realme Narzo 50
Realme 8i
Vivo Y19
Realme 6i
Realme 6i (6GB RAM + 64GB)


In [67]:
# # Exporting the desired dataframes
# import pickle
# with open("phones_with_image_path.pkl", "wb") as f:
#     pickle.dump(df, f)

# with open("similarity_matrix.pkl", "wb") as f:
#     pickle.dump(similarity_matrix, f)

In [66]:
similarity_matrix

array([[ 1.        ,  0.15852025, -0.19163173, ..., -0.2517849 ,
         0.17937141, -0.16082402],
       [ 0.15852025,  1.        , -0.25736652, ...,  0.1146003 ,
        -0.04386213,  0.06183405],
       [-0.19163173, -0.25736652,  1.        , ...,  0.1114273 ,
         0.15790891,  0.24829826],
       ...,
       [-0.2517849 ,  0.1146003 ,  0.1114273 , ...,  1.        ,
         0.01172659, -0.23330545],
       [ 0.17937141, -0.04386213,  0.15790891, ...,  0.01172659,
         1.        , -0.09248385],
       [-0.16082402,  0.06183405,  0.24829826, ..., -0.23330545,
        -0.09248385,  1.        ]])