In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Flatten, Dense
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.metrics import RootMeanSquaredError

from google.colab import drive
drive.mount('/content/drive')
data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/reviews_and_metadata.csv')

data.drop_duplicates(subset=['asin'])
data.reset_index(drop=True)



user_id_map = {id:i for i,id in enumerate(data['reviewerID'].unique())}
product_id_map = {id:i for i,id in enumerate(data['asin'].unique())}
user_ids = data['reviewerID'].map(user_id_map).values.astype(np.int32)
product_ids = data['asin'].map(product_id_map).values.astype(np.int32)
product_ratings = data['overall'].values.astype(np.float32)
num_users = len(user_id_map)
num_products = len(product_id_map)
product_ratings_norm = product_ratings / np.max(product_ratings)


train_user_ids, test_user_ids, train_product_ids, test_product_ids, train_ratings, test_ratings = train_test_split(user_ids, product_ids, product_ratings_norm, test_size=0.2)


model = Sequential()
model.add(Embedding(input_dim=num_products, output_dim=32, input_length=1))
model.add(Flatten())
model.add(Dense(units=16, activation='relu'))
model.add(Dense(units=1, activation='sigmoid'))



# Compile the model
model.compile(optimizer='adam', loss=MeanSquaredError(), metrics=[RootMeanSquaredError()])

# Train the model
model.fit(x=train_product_ids, y=train_ratings, epochs=10, validation_data=(test_product_ids, test_ratings))

# Evaluate the model
test_loss, test_rmse = model.evaluate(x=test_product_ids, y=test_ratings)
print('Test Root Mean Squared Error:', test_rmse)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Root Mean Squared Error: 0.23863232135772705


In [None]:
#model.save('neural_network_model2.h5')

In [None]:
def recommend_products(user_id, model, data, product_id_map, num_recommendations=10):

    # Get the user's index in the user_id_map
    user_index = data[data['reviewerID'] == user_id]['reviewerID'].map(user_id_map).iloc[0]

    # Get the product IDs and predicted ratings for all products
    product_ids = np.array(list(product_id_map.keys()))
    ratings = np.array([model.predict(np.array(product_id_map[prod]).reshape(1,1))[0][0] for prod in product_ids])

    # Sort the products by predicted rating and get the top recommendations
    top_indices = np.argsort(ratings)[::-1][:num_recommendations]
    top_product_ids = product_ids[top_indices]

    # Map the product IDs back to their original IDs and return the recommendations
    top_products = data[data['asin'].isin(top_product_ids)][['title', 'description', 'asin']]
    top_products = top_products.merge(data.groupby('asin')['overall'].mean().reset_index(), on='asin', how='left')
    top_products.columns = ['Title', 'Description', 'ASIN', 'Avg. Rating']
    
    return top_products



In [None]:
user_id = 'A3Z74TDRGD0HU'
recommend_products(user_id, model, data, product_id_map, num_recommendations=10)



Unnamed: 0,Title,Description,ASIN,Avg. Rating
0,"jane iredale So-Bronze, Bronzing Powder, 0.35 oz","['A powder used for bronzing, contouring and h...",B0001EL39C,5.000000
1,"jane iredale So-Bronze, Bronzing Powder, 0.35 oz","['A powder used for bronzing, contouring and h...",B0001EL39C,5.000000
2,"jane iredale So-Bronze, Bronzing Powder, 0.35 oz","['A powder used for bronzing, contouring and h...",B0001EL39C,5.000000
3,"jane iredale So-Bronze, Bronzing Powder, 0.35 oz","['A powder used for bronzing, contouring and h...",B0001EL39C,5.000000
4,"jane iredale So-Bronze, Bronzing Powder, 0.35 oz","['A powder used for bronzing, contouring and h...",B0001EL39C,5.000000
...,...,...,...,...
206,"Pureology Fullfyl Conditioner, 8.5 Fl Oz","['', 'BENEFITS<br /> A Conditioner designed to...",B01EZ6NZU6,4.466667
207,"Pureology Fullfyl Conditioner, 8.5 Fl Oz","['', 'BENEFITS<br /> A Conditioner designed to...",B01EZ6NZU6,4.466667
208,"Pureology Fullfyl Conditioner, 8.5 Fl Oz","['', 'BENEFITS<br /> A Conditioner designed to...",B01EZ6NZU6,4.466667
209,"Pureology Fullfyl Conditioner, 8.5 Fl Oz","['', 'BENEFITS<br /> A Conditioner designed to...",B01EZ6NZU6,4.466667


In [None]:
data['reviewerID'].head()

0    A2EM03F99X3RJZ
1     A3Z74TDRGD0HU
2    A2UXFNW9RTL4VM
3     AXX5G4LFF12R6
4     A7GUKMOJT2NR6
Name: reviewerID, dtype: object

In [None]:
user_ids

array([   0,    1,    2, ..., 5698, 5814, 5760], dtype=int32)

# Another attempt

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Flatten, Dense, Input, Concatenate
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.metrics import RootMeanSquaredError
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model

# Load the data
data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/reviews_and_metadata.csv')

# Create a map for user IDs and product IDs
user_id_map = {id:i for i,id in enumerate(data['reviewerID'].unique())}
product_id_map = {id:i for i,id in enumerate(data['asin'].unique())}

# Map the user IDs and product IDs to numerical values
user_ids = data['reviewerID'].map(user_id_map).values.astype(np.int32)
product_ids = data['asin'].map(product_id_map).values.astype(np.int32)

# Get the product ratings and normalize them
product_ratings = data['overall'].values.astype(np.float32)
product_ratings_norm = product_ratings / np.max(product_ratings)

# Get the product descriptions and tokenize them
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(data['description'])
product_desc = tokenizer.texts_to_sequences(data['description'])
product_desc = pad_sequences(product_desc, maxlen=100)

# Split the data into training and testing sets
train_user_ids, test_user_ids, train_product_ids, test_product_ids, train_ratings, test_ratings, train_desc, test_desc = train_test_split(user_ids, product_ids, product_ratings_norm, product_desc, test_size=0.2)

# Define the model architecture
product_input = Input(shape=(1,))
product_embedding = Embedding(input_dim=len(product_id_map), output_dim=32, input_length=1)(product_input)
product_flatten = Flatten()(product_embedding)

desc_input = Input(shape=(100,))
desc_embedding = Embedding(input_dim=10000, output_dim=32, input_length=100)(desc_input)
desc_flatten = Flatten()(desc_embedding)

user_input = Input(shape=(1,))
user_embedding = Embedding(input_dim=len(user_id_map), output_dim=32, input_length=1)(user_input)
user_flatten = Flatten()(user_embedding)

concatenated = Concatenate()([user_flatten, product_flatten, desc_flatten])
hidden_layer = Dense(units=16, activation='relu')(concatenated)
output_layer = Dense(units=1, activation='sigmoid')(hidden_layer)

# Create the model
model_final2 = Model(inputs=[user_input, product_input, desc_input], outputs=output_layer)

# Compile the model
model_final2.compile(optimizer='adam', loss=MeanSquaredError(), metrics=[RootMeanSquaredError()])

# Train the model
model_final2.fit(x=[train_user_ids, train_product_ids, train_desc], y=train_ratings, epochs=2, validation_data=([test_user_ids, test_product_ids, test_desc], test_ratings))

# Evaluate the model
test_loss, test_rmse = model_final2.evaluate(x=[test_user_ids, test_product_ids, test_desc], y=test_ratings)
print('Test Root Mean Squared Error:', test_rmse)


Epoch 1/2
Epoch 2/2
Test Root Mean Squared Error: 0.2501189410686493


In [None]:
# Saving the model

#model_final2.save('neural_network_model.h5')

In [None]:
from tensorflow.keras.models import load_model

def recommend_products(user_id):

    # Load the data
    #data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/reviews_and_metadata.csv')

    #loaded_model = load_model('neural_network_model.h5')


    user_id_map = {id:i for i,id in enumerate(data['reviewerID'].unique())}

    # Get the product ID map
    product_id_map = {id:i for i,id in enumerate(data['asin'].unique())}

    # Get the user ID index
    user_index = user_id_map[user_id]

    # Get the product descriptions and tokenize them
    tokenizer = Tokenizer(num_words=10000)
    tokenizer.fit_on_texts(data['description'])
    product_desc = tokenizer.texts_to_sequences(data['description'])

    product_desc = pad_sequences(product_desc, maxlen=100)

    # Create arrays for product IDs and descriptions
    product_ids = np.array(list(product_id_map.values()))
    product_descs = np.tile(product_desc, (len(product_id_map), 1))

    # Make predictions for all products
    predictions = model.predict([np.array([user_index] * len(product_ids)), product_ids, product_descs]).flatten()

    # Get the products with predicted ratings greater than 4
    recommended_products = data.loc[predictions > 4, ['asin', 'description', 'title']].drop_duplicates(subset='asin')

    # Return the recommended products
    return recommended_products


# Save the tokenizer. 




    
        
    



In [None]:
recommend_products('A2EM03F99X3RJZ')

# RAM crashing. 

In [None]:
data.head()