<a href="https://colab.research.google.com/github/Rivi9/Laptop-Recommendation-System/blob/Main-Model/laptop_recommender.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import Necessary Libraries

In [1]:
import pandas as pd
import numpy as np
import nltk
from nltk.stem.porter import PorterStemmer
from gensim.models import Word2Vec
from nltk.tokenize import word_tokenize
from sklearn.metrics.pairwise import cosine_similarity
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
from forex_python.converter import CurrencyRates

nltk.download('punkt')

ModuleNotFoundError: No module named 'nltk'

# Data Cleaning and Formatting

**Load in the Data and Examine**

In [None]:
df = pd.read_csv('laptops.csv')

In [None]:
df.head(3)

In [None]:
df.shape

In [None]:
df

**Data Types and Missing Values**

In [None]:
df.info()

In [None]:
df.isnull().sum()

**Drop Columns and Null Values**

In [None]:
df = df.drop(['rating','no_of_ratings','no_of_reviews'], axis = 1)

In [None]:
df.head()

In [None]:
df = df.dropna()

In [None]:
df.isnull().sum()

In [None]:
df.nunique()

In [None]:
df.shape

In [None]:
df.duplicated().sum()

In [None]:
df.iloc[0].laptop_brand

In [None]:
df.head(1)

In [None]:
df['processor'] = df['processor'].apply(lambda x:x.split())
df['ram'] = df['ram'].apply(lambda x:x.split())
df['os'] = df['os'].apply(lambda x:x.split())
df['use'] = df['usecases'].apply(lambda x:x.split())

In [None]:
df['tags'] = df['processor']+df['ram']+df['os']+df['use']

In [None]:
df.head(1)

In [None]:
df.iloc[0].tags

In [None]:
new = df.drop(columns=['processor','ram','os','storage','os_brand','processor_brand','use'])

In [None]:
new.head(2)

In [None]:
new['tags'] = new['tags'].apply(lambda x:" ".join(x))

In [None]:
new.iloc[0].tags

In [None]:
new.head(2)

In [None]:
ps = PorterStemmer()

In [None]:
def stem(text):
    y=[]
    for i in text.split():
        y.append(ps.stem(i))

    return " ".join(y)

In [None]:
new['tags'] = new['tags'].apply(stem)

In [None]:
new.iloc[0].tags

In [None]:
new.head(2)

In [None]:
currency_rates = CurrencyRates()

new['price'] = new['price'].apply(lambda x:currency_rates.convert(base_cur='INR',dest_cur='USD',amount=x))

In [None]:
new['usecases'] = new['usecases'].str.replace(' Gaming','Gaming')
new['usecases'] = new['usecases'].str.replace(' Business/Professional','Business/Professional')
new['usecases'] = new['usecases'].str.replace(' Multimedia/Entertainment','IT')
new['usecases'] = new['usecases'].str.replace(' Student/Education','Student/Education')
new['usecases'] = new['usecases'].str.replace(' Creative/Design','Creative/Design')
new['usecases'] = new['usecases'].str.replace(' Travel/On-the-go','Basic')
new['usecases'] = new['usecases'].str.replace(' Home/Everyday use','Basic')
new['usecases'] = new['usecases'].str.replace(' Programming/Development','IT')
new['usecases'] = new['usecases'].str.replace(' Budget Friendly','Basic')
new['usecases'] = new['usecases'].str.replace('Ultra-portable','IT')




In [None]:
new.groupby('usecases').size().plot(kind='barh', color=sns.palettes.mpl_palette('Dark2'))
plt.gca().spines[['top', 'right',]].set_visible(False)

# Vectorization

**Word2Vec**

In [None]:
# Tokenize the tags
new['tags_tokens'] = new['tags'].apply(word_tokenize)

In [None]:
# Train the Word2Vec model
model = Word2Vec(sentences=new['tags_tokens'], vector_size=100, window=5, min_count=1, workers=4)

In [None]:
# Get the Word2Vec vectors for each laptop
word2vec_vectors = []
for tags_tokens in new['tags_tokens']:
    vectors = [model.wv[word] for word in tags_tokens if word in model.wv]
    if vectors:
        word2vec_vectors.append(np.mean(vectors, axis=0))
    else:
        word2vec_vectors.append(np.zeros(100))

In [None]:
word2vec_vectors = np.array(word2vec_vectors)

**Cosine Similarity**

In [None]:
word2vec_similarity = cosine_similarity(word2vec_vectors)

In [None]:
#Print the similarity of the first laptop with others
print(word2vec_similarity[0])


# Recommendation

In [None]:
# def recommends(laptop):
#     index = new[new['usecases']==laptop].index[0]
#     distances = sorted(list(enumerate(word2vec_similarity[index])), reverse=True, key = lambda x:x[1])
#     for i in distances[1:10]:
#         print(new.iloc[i[0]].price)

In [None]:
def recommends():
  use = input('Enter the Workfield :-')
  index = new[new['usecases'] == use].index[0]
  distances = sorted(list(enumerate(word2vec_similarity[index])), reverse=True, key=lambda x: x[1])
  printed_names = set()
  for i in distances[1:50]:
    row_index = i[0]
    name = new.iloc[row_index]['name']
    price = new.iloc[row_index]['price']
    if name not in printed_names:
      printed_names.add(name)
      print(name)
      print(price)


In [None]:
# sorted(list(enumerate(word2vec_similarity[0])), reverse=True, key = lambda x:x[1])

In [None]:
recommends()

# Export the Dataframe and the Model

In [None]:
pickle.dump(new, open('dataframe.pkl','wb'))
pickle.dump(word2vec_similarity, open('word2vec_similarity.pkl','wb'))