In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
import pandas as pd
import numpy as np
from zipfile import ZipFile
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from pathlib import Path
import matplotlib.pyplot as plt

In [3]:
df = pd.read_csv ('/content/drive/MyDrive/Recommendation/dfn3share.csv')
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
df.head()

Unnamed: 0,user,service,count,subcat,gender
0,3646,92,127,15,1
1,3646,94,18,15,1
2,3646,93,19,15,1
3,3646,95,2,17,1
4,3646,114,1,15,1


In [4]:
df['Ratings'] = pd.cut(df['count'], bins=5, labels=[1, 2, 3,4,5])
df.head()

Unnamed: 0,user,service,count,subcat,gender,Ratings
0,3646,92,127,15,1,5
1,3646,94,18,15,1,1
2,3646,93,19,15,1,1
3,3646,95,2,17,1,1
4,3646,114,1,15,1,1


In [5]:
df=df[['user','service','Ratings']]

In [6]:
user_ids = df["user"].unique().tolist()
user2user_encoded = {x: i for i, x in enumerate(user_ids)}
userencoded2user = {i: x for i, x in enumerate(user_ids)}
movie_ids = df["service"].unique().tolist()
movie2movie_encoded = {x: i for i, x in enumerate(movie_ids)}
movie_encoded2movie = {i: x for i, x in enumerate(movie_ids)}
df["user"] = df["user"].map(user2user_encoded)
df["service"] = df["service"].map(movie2movie_encoded)
num_users = len(user2user_encoded)
num_movies = len(movie_encoded2movie)
df["Ratings"] = df["Ratings"].values.astype(np.float32)
# min and max ratings will be used to normalize the ratings later
min_rating = min(df["Ratings"])
max_rating = max(df["Ratings"])

print(
    "Number of users: {}, Number of services: {}, Min Ratings: {}, Max Ratings: {}".format(
        num_users, num_movies, min_rating, max_rating
    )
)


Number of users: 426026, Number of services: 301, Min Ratings: 1.0, Max Ratings: 5.0


In [7]:
df = df.sample(frac=1, random_state=42)
x = df[["user", "service"]].values
# Normalize the targets between 0 and 1. Makes it easy to train.
y = df["Ratings"].apply(lambda x: (x - min_rating) / (max_rating - min_rating)).values
# Assuming training on 90% of the data and validating on 10%.
train_indices = int(0.9 * df.shape[0])
x_train, x_val, y_train, y_val = (
    x[:train_indices],
    x[train_indices:],
    y[:train_indices],
    y[train_indices:],
)


In [8]:
EMBEDDING_SIZE = 20


class RecommenderNet(keras.Model):
    def __init__(self, num_users, num_movies, embedding_size, **kwargs):
        super().__init__(**kwargs)
        self.num_users = num_users
        self.num_movies = num_movies
        self.embedding_size = embedding_size
        self.user_embedding = layers.Embedding(
            num_users,
            embedding_size,
            embeddings_initializer="he_normal",
            embeddings_regularizer=keras.regularizers.l2(1e-6),
        )
        self.user_bias = layers.Embedding(num_users, 1)
        self.movie_embedding = layers.Embedding(
            num_movies,
            embedding_size,
            embeddings_initializer="he_normal",
            embeddings_regularizer=keras.regularizers.l2(1e-6),
        )
        self.movie_bias = layers.Embedding(num_movies, 1)

    def call(self, inputs):
        user_vector = self.user_embedding(inputs[:, 0])
        user_bias = self.user_bias(inputs[:, 0])
        movie_vector = self.movie_embedding(inputs[:, 1])
        movie_bias = self.movie_bias(inputs[:, 1])
        dot_user_movie = tf.tensordot(user_vector, movie_vector, 2)
        # Add all the components (including bias)
        x = dot_user_movie + user_bias + movie_bias
        # The sigmoid activation forces the rating to between 0 and 1
        return tf.nn.sigmoid(x)


model = RecommenderNet(num_users, num_movies, EMBEDDING_SIZE)
model.compile(
    loss=tf.keras.losses.BinaryCrossentropy(),
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
)


In [9]:
history = model.fit(
    x=x_train,
    y=y_train,
    batch_size=64,
    epochs=3,
    verbose=1,
    validation_data=(x_val, y_val),
)


Epoch 1/3
Epoch 2/3
Epoch 3/3


In [None]:
from tensorflow.keras.models import Sequential, save_model, load_model

In [None]:
# Save the model
filepath = 'REC2'
save_model(model, filepath)




In [None]:
!zip -r REC2.zip REC2

  adding: REC2/ (stored 0%)
  adding: REC2/keras_metadata.pb (deflated 81%)
  adding: REC2/assets/ (stored 0%)
  adding: REC2/saved_model.pb (deflated 88%)
  adding: REC2/fingerprint.pb (stored 0%)
  adding: REC2/variables/ (stored 0%)
  adding: REC2/variables/variables.index (deflated 55%)
  adding: REC2/variables/variables.data-00000-of-00001 (deflated 8%)


In [10]:
service_df = pd.read_csv ('/content/drive/MyDrive/Recommendation/dfn3share.csv')
service_df = service_df.loc[:, ~service_df.columns.str.contains('^Unnamed')]

# Let us get a user and see the top recommendations.
user_id =193238
service_use_by_user = df[df.user == user_id]
service_not_used = service_df[
    ~service_df["service"].isin(service_use_by_user.service.values)
]["service"]
service_not_used = list(
    set(service_not_used).intersection(set(movie2movie_encoded.keys()))
)
service_not_used = [[movie2movie_encoded.get(x)] for x in service_not_used]
user_encoder = user2user_encoded.get(user_id)
user_movie_array = np.hstack(
    ([[user_encoder]] * len(service_not_used), service_not_used)
)
ratings = model.predict(user_movie_array).flatten()

top_ratings_indices = ratings.argsort()[-10:][::-1]
recommended_movie_ids = [
    movie_encoded2movie.get(service_not_used[x][0]) for x in top_ratings_indices
]

print("Showing recommendations for user: {}".format(user_id))

# top_movies_user = (
#     service_use_by_user.sort_values(by="count", ascending=False)
#     .head(5)
#     .service.values
# )
# movie_df_rows = service_df[service_df["service"].isin(top_movies_user)]
# for row in movie_df_rows.itertuples():
#     print(row.service, ":", row.subcat)

print("----" * 8)
print("Top 10 service recommendations")
print("----" * 8)
recommended_movies = service_df[service_df["service"].isin(recommended_movie_ids)]
for row in recommended_movies.itertuples():
    print('The code of services:',row.service,)


Showing recommendations for user: 706889
--------------------------------
Top 10 service recommendations
--------------------------------
The code of services: 534
The code of services: 188
The code of services: 534
The code of services: 162
The code of services: 193
The code of services: 113
The code of services: 113
The code of services: 214
The code of services: 190
The code of services: 101
The code of services: 193
The code of services: 101
The code of services: 190
The code of services: 214
The code of services: 534
The code of services: 113
The code of services: 78
The code of services: 101
The code of services: 534
The code of services: 534
The code of services: 534
The code of services: 198


In [None]:
ratings

In [11]:
df2=pd.DataFrame(user_movie_array)
df2=df2.rename(columns={0: 'user'})
df2=df2.rename(columns={1: 'service'})



In [12]:
df3=pd.DataFrame(ratings)
df3=df3.rename(columns={0: 'ratings'})
df3.head()

Unnamed: 0,ratings
0,3.3e-05
1,0.037986
2,0.051156
3,0.001447
4,6.2e-05


In [13]:
df4=pd.concat([df2,df3],axis=1)
df4.head().sort_values(by='ratings',ascending=False)


Unnamed: 0,user,service,ratings
2,64,128,0.051156
1,64,43,0.037986
3,64,110,0.001447
4,64,86,6.2e-05
0,64,70,3.3e-05
