## Mobile Recommendation System Using Vectorization Similarity

In [1]:
import os
import re
import shutil
import warnings

import kaggle
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
os.makedirs("./datasets", exist_ok=True)
if "mobile_recommendation_system_dataset.csv" not in os.listdir("./datasets/"):
    kaggle.api.dataset_download_files(
        "gyanprakashkushwaha/mobile-recommendation-system-dataset"
    )
    shutil.unpack_archive(
        "mobile-recommendation-system-dataset.zip", extract_dir="./datasets/"
    )

In [3]:
df = pd.read_csv(
    "datasets/mobile_recommendation_system_dataset.csv",
    index_col=False,
)
df.head()

Unnamed: 0,name,ratings,price,imgURL,corpus
0,"REDMI Note 12 Pro 5G (Onyx Black, 128 GB)",4.2,23999,https://rukminim2.flixcart.com/image/312/312/x...,Storage128 GBRAM6 SystemAndroid 12Processor T...
1,"OPPO F11 Pro (Aurora Green, 128 GB)",4.5,"₹20,999",https://rukminim2.flixcart.com/image/312/312/k...,Storage128 GBRAM6 GBExpandable Storage256GB S...
2,"REDMI Note 11 (Starburst White, 64 GB)",4.2,13149,https://rukminim2.flixcart.com/image/312/312/x...,Storage64 GBRAM4 SystemAndroid 11Processor Sp...
3,"OnePlus Nord CE 5G (Blue Void, 256 GB)",4.1,21999,https://rukminim2.flixcart.com/image/312/312/x...,Storage256 GBRAM12 SystemAndroid Q 11Processo...
4,"APPLE iPhone 13 mini (Blue, 128 GB)",4.6,3537,https://rukminim2.flixcart.com/image/312/312/k...,Storage128 SystemiOS 15Processor TypeA15 Bion...


In [4]:
def random_values(col_name: str, total=6):
    for i in range(0, total):
        index = np.random.randint(df.shape[0])
        print(index, ":", df[col_name].iloc[index])


random_values("corpus")

396 : Storage128 GBRAM6 GBExpandable Storage1TB  SystemAndroid 11Processor TypeMediaTek Dimensity 700 (MT6833V)Processor Speed2.2 48MP 5MP 2MP 48MP 5MP 2MP 8MP 8MP 5G Capacity5000 Display Size16.76 cm (6.6 inch)Resolution2408 x 1080 PixelsResolution TypeFull HD+GPUARM Mali G57 MC2Display TypeFull HD+ TFT DisplayDisplay Colors16MOther Display Features90Hz Refresh Rate
1936 : Storage2.27 MBRAM4  SystemAndroid 11Processor TypeMediatek Helio P35 (MT6765)Processor Speed2.3 13MP 2MP 2MP 13MP 2MP 2MP 5MP 5MP 4G Capacity1000 Display Size5.16 cm (2.03 inch)Resolution128 x 160 PixelsResolution TypeQVGA
2455 : Storage16 GBRAM2 GBExpandable Storage128GB  SystemAndroid Marshmallow 6.0.1Processor TypeQualcomm Snapdragon 430 64-bit Octa Core 1.4GHzProcessor Speed1.4 13MP 5MP 3G Capacity4100 Display Size12.7 cm (5 inch)Resolution1280 x 720 PixelsResolution TypeHDGPUAdreno 505Other Display FeaturesIPS Display
2483 : Storage256 GBRAM8  SystemAndroid MIUI 12, Android 11.0Processor Speed2.84 108MP 5G Capa

## Lowercasing the corpus values

In [5]:
df["corpus"] = df["corpus"].str.lower()

In [6]:
random_values("corpus")

2077 : storage128 gbram6 gbexpandable storage256gb  systemandroid oreo 8.1.0processor typemtk p60 octa core 2.0 ghzprocessor speed2 16mp 25mp 3g capacity3400 display size15.82 cm (6.23 inch)resolution2280 x 1080 pixelsresolution typefull hd+other display featuresscreen contrast: 1500:1, corning gorilla glass 5
2175 : storage32 gbram3 gbexpandable storage256gb  systemandroid pie 9.0processor typemediatek helio p70 octa core 2.1 ghz aiprocessor speed2.1 13mp 2mp 13mp 3g capacity4230 display size15.8 cm (6.22 inch)resolution1520 x 720 pixelsresolution typehd+gpuarm mali-g72display colors16.7mother display featuresnarrow frame: 2.05mm, screen ratio: 88.30%, screen contrast: typical - 1200:1, minimum - 800:1, color saturation: typical - 69.5%, minimum - 65%, maximum brightness: typical - 450 nits, minimum - 400 nits, touch panel glass type: corning gorilla glass 3, in-cell touch panel technology, lcd ips screen
284 : storage32 gbram2 gbexpandable storage512gb  systemandroid pie 9.0processor

In [7]:
df.isnull().sum()

name        0
ratings     0
price       0
imgURL      0
corpus     12
dtype: int64

In [8]:
df.dropna(inplace=True)

In [9]:
max_len = 0
for i in range(0, len(df["corpus"])):
    try:
        if len(df["corpus"][i]) > max_len:
            max_len = len(df["corpus"][i])
    except:
        pass

print(f"max words len in corpus is: {max_len}")

max words len in corpus is: 1196


In [10]:
df.sample(10)

Unnamed: 0,name,ratings,price,imgURL,corpus
997,"vivo Y51 (Titanium Sapphire, 128 GB)",4.3,21990,https://rukminim2.flixcart.com/image/312/312/k...,storage128 gbram8 gbexpandable storage1tb sys...
1051,"REDMI 10 Prime (Astral White, 128 GB)",4.2,14999,https://rukminim2.flixcart.com/image/312/312/k...,storage128 gbram6 gbexpandable storage512gb s...
501,"APPLE iPhone 12 mini (Black, 128 GB)",4.4,9537,https://rukminim2.flixcart.com/image/312/312/k...,storage128 systemios 14processor typea14 bion...
1186,"OPPO F11 Pro (Thunder Black, 64 GB)",4.3,"₹18,500",https://rukminim2.flixcart.com/image/312/312/k...,storage64 gbram6 gbexpandable storage256gb sy...
2512,"SAMSUNG Galaxy S22 Ultra 5G (Phantom Black, 51...",4.2,21072,https://rukminim2.flixcart.com/image/312/312/x...,storage512 gbram12 systemandroid 11processor ...
2220,"MOTOROLA Edge 20 Fusion 5G (Cyber Teal, 128 GB)",4.1,20999,https://rukminim2.flixcart.com/image/312/312/k...,storage128 gbram8 systemandroid 11processor t...
294,"OnePlus Nord 2T 5G (Gray Shadow, 128 GB)",4.4,28799,https://rukminim2.flixcart.com/image/312/312/x...,storage128 gbram8 systemandroid 13processor s...
1978,"OPPO A12 (Flowing Silver, 64 GB)",4.4,"₹10,990",https://rukminim2.flixcart.com/image/312/312/k...,storage64 gbram4 gbexpandable storage256gb sy...
1400,"SAMSUNG Galaxy A13 (White, 64 GB)",4.3,14499,https://rukminim2.flixcart.com/image/312/312/l...,storagenacall log systemandroid 12processor t...
1678,"SAMSUNG Galaxy S10 (Prism Blue, 128 GB)",4.2,4464,https://rukminim2.flixcart.com/image/312/312/j...,storage128 gbram8 gbexpandable storage512gb s...


In [11]:
random_values("imgURL")

676 : https://rukminim2.flixcart.com/image/312/312/l0igvww0/mobile/y/j/1/-original-imagca5ge9yrbrzq.jpeg?q=70
217 : https://rukminim2.flixcart.com/image/312/312/k87nxjk0/mobile/m/f/k/redmi-note-8-pro-mzb8314in-original-imafqaf3mak86q8e.jpeg?q=70
612 : https://rukminim2.flixcart.com/image/312/312/kq6yefk0/mobile/i/w/m/11-lite-m2101k9ai-mi-original-imag496gkgqjrvvg.jpeg?q=70
1569 : https://rukminim2.flixcart.com/image/312/312/l2p23rk0/mobile/7/v/w/-original-imagdznjzgvkkhtx.jpeg?q=70
1629 : https://rukminim2.flixcart.com/image/312/312/k51cpe80pkrrdj/mobile/y/z/7/realme-5-rmx1911-original-imafje89tgyzdj7c.jpeg?q=70
790 : https://rukminim2.flixcart.com/image/312/312/xif0q/mobile/e/d/r/-original-imagmym54wrhzgpn.jpeg?q=70


## vectorization (converts words to vectors)

In [12]:
cont_vect = CountVectorizer(max_features=1000)

arrays = cont_vect.fit_transform(df["corpus"])

vectors = arrays.toarray()

cont_vect.get_feature_names_out()

array(['00', '000', '02', '03', '04', '05', '07', '07bother', '08', '09',
       '0mp', '0primary', '0processor', '10', '100', '1000', '100000',
       '1000000', '100mp', '103', '104', '106', '1080', '1080processor',
       '1080resolution', '108mp', '10mp', '10processor', '11', '1100',
       '1125', '1170', '1179', '11primary', '11processor', '12', '120',
       '1200', '1200processor', '120hz', '1242', '1280processor', '1284',
       '1290', '12mp', '12primary', '12processor', '13', '1300',
       '1300processor', '1330', '1334', '135', '13mp', '13processor',
       '14', '1400', '144', '1440', '144hz', '1480', '14processor', '15',
       '1500', '1544', '15processor', '16', '160', '1600', '16000',
       '1600resolution', '1612', '16mp', '16processor', '18', '180',
       '1800', '180hz', '19', '1920', '1920hz', '1processor', '20', '200',
       '2000000', '200mp', '20mp', '21', '22', '23', '2340', '24', '240',
       '2400', '2400resolution', '2408', '240hz', '2460', '24mp', '25m

## Calculating cosine similarity for recommendation

In [13]:
similarity = cosine_similarity(vectors)
similarity.shape

(2534, 2534)

In [14]:
similarity[3]

array([0.3345748 , 0.17485208, 0.53136893, ..., 0.57564968, 0.27586342,
       0.37283655])

In [15]:
sorted(list(enumerate(similarity[0])), reverse=True, key=lambda x: x[1])[1:11]

[(68, 0.9999999999999999),
 (303, 0.9999999999999999),
 (616, 0.9850746268656716),
 (1461, 0.9850746268656716),
 (457, 0.9701492537313433),
 (1027, 0.9701492537313433),
 (1735, 0.9701492537313433),
 (569, 0.9468131937613656),
 (1123, 0.9468131937613656),
 (1540, 0.9468131937613656)]

In [16]:
def recommend(mobile):
    mobile_index = df[df["name"] == mobile].index[0]
    similarity_array = similarity[mobile_index]
    similar_10_mobiles = sorted(
        list(enumerate(similarity_array)), reverse=True, key=lambda x: x[1]
    )[1:11]

    for i in similar_10_mobiles:
        print(df["name"].iloc[i[0]])

In [17]:
recommend("APPLE iPhone 13 mini (Blue, 128 GB)")

APPLE iPhone 13 mini ((PRODUCT)RED, 128 GB)
APPLE iPhone 13 mini (Starlight, 128 GB)
APPLE iPhone 13 mini (Green, 128 GB)
APPLE iPhone 13 mini (Midnight, 128 GB)
APPLE iPhone 13 mini (Green, 256 GB)
APPLE iPhone 13 mini (Midnight, 512 GB)
APPLE iPhone 13 mini (Pink, 512 GB)
APPLE iPhone 13 mini (Starlight, 256 GB)
APPLE iPhone 13 mini (Midnight, 256 GB)
APPLE iPhone 13 mini (Pink, 256 GB)
