In [None]:
!pip install azure-storage-blob pandas numpy keras tensorflow joblib gradio


Collecting azure-storage-blob
  Downloading azure_storage_blob-12.23.1-py3-none-any.whl.metadata (26 kB)
Collecting gradio
  Downloading gradio-5.0.2-py3-none-any.whl.metadata (15 kB)
Collecting azure-core>=1.30.0 (from azure-storage-blob)
  Downloading azure_core-1.31.0-py3-none-any.whl.metadata (39 kB)
Collecting isodate>=0.6.1 (from azure-storage-blob)
  Downloading isodate-0.7.2-py3-none-any.whl.metadata (11 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0 (from gradio)
  Downloading fastapi-0.115.2-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.4.0 (from gradio)
  Downloading gradio_client-1.4.0-py3-none-any.whl.metadata (7.1 kB)
Collecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting huggingface-hub>=0.25.1 (from gradio)
  Downloadi

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime
from azure.storage.blob import BlobServiceClient
from io import StringIO
from keras.models import Model, load_model
from keras.layers import Input, Embedding, Flatten, Dense, Concatenate, Dropout, BatchNormalization
from keras.optimizers import AdamW
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import joblib
import gradio as gr


In [None]:
connection_string = (
    "DefaultEndpointsProtocol=https;AccountName=netflixrecommendation;AccountKey=KTWSAJ2ds/jnkpU9PmkX6U28mLrVEtP2yZ7+nKDcF4EzPgPpuu+uJB54VEYwq8gp+N8J55kgBpxl+AStA61cHg==;EndpointSuffix=core.windows.net"
)

blob_service_client = BlobServiceClient.from_connection_string(connection_string)
container_name = "netflix-data"
container_client = blob_service_client.get_container_client(container_name)


In [None]:
def load_csv_from_blob(blob_name):
    blob_client = container_client.get_blob_client(blob_name)
    blob_data = blob_client.download_blob().readall()
    csv_str = blob_data.decode('utf-8')
    data = StringIO(csv_str)
    return pd.read_csv(data)

movies_data = load_csv_from_blob("movies.csv")
ratings_data = load_csv_from_blob("ratings.csv")


In [None]:
# Convert userId and movieId to categorical codes for model compatibility
ratings_data['userId'] = ratings_data['userId'].astype('category').cat.codes
ratings_data['movieId'] = ratings_data['movieId'].astype('category').cat.codes

num_users = ratings_data['userId'].nunique()
num_movies = ratings_data['movieId'].nunique()

# Split the data into training and testing sets
train_data, test_data = train_test_split(ratings_data, test_size=0.2, random_state=42)


In [None]:
def create_ncf_model(num_users, num_movies, embedding_dim=50):
    user_input = Input(shape=(1,), name='user_input')
    movie_input = Input(shape=(1,), name='movie_input')

    user_embedding = Embedding(num_users, embedding_dim, name='user_embedding')(user_input)
    movie_embedding = Embedding(num_movies, embedding_dim, name='movie_embedding')(movie_input)

    user_vector = Flatten()(user_embedding)
    movie_vector = Flatten()(movie_embedding)

    concatenated = Concatenate()([user_vector, movie_vector])
    hidden_layer = Dense(128, activation='relu')(concatenated)
    hidden_layer = BatchNormalization()(hidden_layer)
    hidden_layer = Dropout(0.2)(hidden_layer)  # Regularization

    hidden_layer = Dense(64, activation='relu')(hidden_layer)
    output_layer = Dense(1)(hidden_layer)

    model = Model(inputs=[user_input, movie_input], outputs=output_layer)
    model.compile(optimizer=AdamW(learning_rate=0.001), loss='mean_squared_error')

    return model


In [None]:
def train_model(train_data, num_users, num_movies):
    model = create_ncf_model(num_users, num_movies)
    X_train = [train_data['userId'].values, train_data['movieId'].values]
    y_train = train_data['rating'].values

    model.fit(
        X_train, y_train, epochs=10, batch_size=256, verbose=1,
        validation_split=0.1
    )

    model.save("ncf_model.h5")
    return model

model = train_model(train_data, num_users, num_movies)


Epoch 1/10
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 7ms/step - loss: 4.5915 - val_loss: 6.8658
Epoch 2/10
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - loss: 0.8245 - val_loss: 2.1990
Epoch 3/10
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - loss: 0.6974 - val_loss: 0.8853
Epoch 4/10
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 7ms/step - loss: 0.6040 - val_loss: 0.8705
Epoch 5/10
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - loss: 0.5469 - val_loss: 0.8838
Epoch 6/10
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 10ms/step - loss: 0.5042 - val_loss: 0.9008
Epoch 7/10
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - loss: 0.4582 - val_loss: 0.9191
Epoch 8/10
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - loss: 0.4297 - val_loss: 0.9282
Epoch 9/10
[1m282/282[0m [32m━━━━━



In [None]:
def load_trained_model():
    return load_model("ncf_model.h5")

def recommend_movies(user_id, n_recommendations=5):
    model = load_trained_model()

    # Filter out movies the user has already rated
    user_ratings = ratings_data[ratings_data['userId'] == user_id]
    rated_movie_ids = user_ratings['movieId'].unique()

    all_movie_ids = ratings_data['movieId'].unique()
    not_watched = list(set(all_movie_ids) - set(rated_movie_ids))

    if not_watched:
        movie_ids = np.array(not_watched)
        user_array = np.full_like(movie_ids, user_id)

        # Predict ratings for unwatched movies
        predicted_ratings = model.predict([user_array, movie_ids]).flatten()

        recommendations = pd.DataFrame({
            'movieId': movie_ids,
            'predicted_rating': predicted_ratings
        }).merge(movies_data, on='movieId', how='left')

        return recommendations.sort_values('predicted_rating', ascending=False).head(n_recommendations)
    else:
        return None



In [None]:
def gradio_recommend(user_id):
    try:
        user_id = int(user_id)
        recommendations = recommend_movies(user_id)

        if recommendations is not None:
            result = "\n".join(
                f"{row['title']} (Predicted Rating: {row['predicted_rating']:.2f})"
                for _, row in recommendations.iterrows()
            )
            return result
        else:
            return f"No recommendations available for user {user_id}."
    except ValueError:
        return "Please enter a valid numeric user ID."

iface = gr.Interface(
    fn=gradio_recommend,
    inputs=gr.Textbox(label="Enter User ID"),
    outputs=gr.Textbox(label="Recommendations"),
    title="Movie Recommendation System",
    description="Get movie recommendations based on predicted ratings."
)


In [None]:
iface.launch()


Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://225edd68deca57bd0b.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


