<a href="https://colab.research.google.com/github/LakshyaV/product-rec/blob/main/recommendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Dependencies**

In [None]:
import pandas as pd
import re
from nltk.tokenize import word_tokenize
import numpy as np
import tensorflow as tf

from gensim.models import Word2Vec
from mlxtend.frequent_patterns import fpgrowth, association_rules
from mlxtend.preprocessing import TransactionEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, Flatten, Input

**Word2Vec Model**

In [None]:
costco_df = pd.read_csv("costco_items_unique.csv")

In [None]:
stopwords = set([
    'i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', 'your', 'yours',
    'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', 'her', 'hers', 'herself',
    'it', 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which',
    'who', 'whom', 'this', 'that', 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be',
    'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an',
    'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for',
    'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after',
    'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under',
    'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all',
    'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not',
    'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't', 'can', 'will', 'just',
    'don', 'should', 'now'
])

In [None]:
def prepocess_dataset(text):
  text = text.lower()
  text = re.sub(r'[^\w\s]', '', text)
  tokens = [word for word in text.split() if word not in stopwords]
  return tokens

costco_df['New'] = costco_df['Item Name'].astype(str) + ' ' + costco_df['Item Description'].astype(str) + ' ' + costco_df['Category'].astype(str) + ' ' + costco_df['Price'].astype(str)
costco_df['Processed Description'] = costco_df['New'].apply(prepocess_dataset)

In [None]:
sentences = costco_df['Processed Description'].tolist()
model = Word2Vec(sentences, vector_size=100, window=5, min_count=1, workers=4)

In [None]:
def similar_items(item_name, top_n=2):
    item_index = costco_df[costco_df['Item Name'] == item_name].index[0]
    item_vector = model.wv[model.wv.index_to_key[item_index]]
    similar_items = model.wv.similar_by_vector(item_vector, topn=top_n)
    similar_item_names = [costco_df.iloc[model.wv.key_to_index[item]].loc['Item Name'] for item, _ in similar_items]
    return similar_item_names

In [None]:
similar_items_list = similar_items("Gerber Organic Baby Food #3856")
print(similar_items_list[1])

**FP-Growth Model**

In [None]:
df_itemsets = pd.read_csv("costco_transactions_with_descriptions.csv")
transactions = df_itemsets.groupby('Transaction ID')['Item Name'].apply(list).tolist()

In [None]:
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df_encoded = pd.DataFrame(te_ary, columns=te.columns_)

frequent_itemsets = fpgrowth(df_encoded, min_support=0.01, use_colnames=True)
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)

In [None]:
def get_recommendations(cart_items, frequent_itemsets, rules):
    cart_df = pd.DataFrame([cart_items])
    cart_encoded = cart_df[cart_df.columns[cart_df.columns.isin(cart_items)]]
    cart_encoded = cart_encoded.fillna(0)

    itemsets = frequent_itemsets[frequent_itemsets['itemsets'].apply(lambda x: set(cart_items).issubset(x))]

    recommendations = []
    for itemset in itemsets['itemsets']:
        associated_items = rules[rules['antecedents'] == frozenset(itemset)]
        recommendations.extend(associated_items['consequents'])

    return set(recommendations) - set(cart_items)

In [None]:
cart_items = ['Organic Blueberries', 'Organic Strawberries']
print(get_recommendations(cart_items, frequent_itemsets, rules))

**LSTM Model**

In [None]:
purchase_df = pd.read_csv("past.csv")

In [None]:
purchase_df['Date'] = pd.to_datetime(purchase_df['Date'])

In [None]:
purchase_df = purchase_df.sort_values(by='Date')

In [None]:
item_dict = {item: i for i, item in enumerate(purchase_df['Product Name'].unique())}
purchase_df['Product Name'] = purchase_df['Product Name'].map(item_dict)

In [None]:
purchase_sequence = purchase_df['Product Name'].tolist()

In [None]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
max_sequence_length = 50
padded_sequence = pad_sequences([purchase_sequence], maxlen=max_sequence_length, padding='post')[0]

In [None]:
X = []
y = []
sequence_length = max_sequence_length - 1

for i in range(len(padded_sequence) - sequence_length):
    X.append(padded_sequence[i:i+sequence_length])
    y.append(padded_sequence[i+sequence_length])

X = np.array(X)
y = np.array(y)

num_items = len(item_dict)
y = np.eye(num_items)[y]

In [None]:
if len(X) < 2:
    X_train, y_train = X, y
    X_val, y_val = X, y
else:
    split_index = int(0.8 * len(X))
    X_train, X_val = X[:split_index], X[split_index:]
    y_train, y_val = y[:split_index], y[split_index:]


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding

model = Sequential([
    Embedding(input_dim=num_items, output_dim=50, input_length=sequence_length),
    LSTM(100, return_sequences=False),
    Dense(num_items, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_val, y_val))

In [None]:
def get_lstm_recommendations(model, input_sequence, item_dict, top_n=5):
    input_sequence = np.array(input_sequence).reshape(1, -1)
    predictions = model.predict(input_sequence)
    predicted_items = np.argsort(predictions[0])[-top_n:]
    reverse_item_dict = {v: k for k, v in item_dict.items()}
    return [reverse_item_dict[i] for i in predicted_items]

test_sequence = padded_sequence[-5:]
recommendations = get_lstm_recommendations(model, test_sequence, item_dict)
print(f'Recommendations: {recommendations}')

**Neural Collaborative Filtering**

In [None]:
df = pd.read_csv('past.csv')

In [None]:
from sklearn.preprocessing import LabelEncoder

product_encoder = LabelEncoder()
store_encoder = LabelEncoder()

df['Product Name'] = product_encoder.fit_transform(df['Product Name'])
df['Store'] = store_encoder.fit_transform(df['Store'])

In [None]:
interaction_matrix = pd.pivot_table(df, index='Product Name', columns='Store', values='Price', fill_value=0)
user_item_matrix = interaction_matrix.values

In [None]:
X_train, X_test = train_test_split(user_item_matrix, test_size=0.2, random_state=42)

In [None]:
model = Sequential([
    Input(shape=(user_item_matrix.shape[1],)),
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(user_item_matrix.shape[1], activation='sigmoid')  # Output layer
])

model.compile(optimizer='adam', loss='binary_crossentropy')

In [None]:
historyTwo = model.fit(X_train, X_train, epochs=10, batch_size=32, validation_split=0.2)

In [None]:
predictionsTwo = model.predict(user_item_matrix)

In [None]:
def recommend_items(predictions, top_n=10):
    user_predictions = predictions[0]
    top_indices = np.argsort(user_predictions)[::-1][:top_n]
    return top_indices

In [None]:
def index_to_product_name(indices, encoder):
    return encoder.inverse_transform(indices)

In [None]:
top_n = 1
top_indices = recommend_items(predictionsTwo, top_n)

recommended_product_names = index_to_product_name(top_indices, product_encoder)

for product in recommended_product_names:
    print(product)