In [1]:
# Importing Libraries
import numpy as np
import pandas as pd
import sklearn
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [3]:
#loading rating dataset
ratings = pd.read_csv(r"D:\DEPI\ratings.csv")
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,17,4.0,944249077
1,1,25,1.0,944250228
2,1,29,2.0,943230976
3,1,30,5.0,944249077
4,1,32,5.0,943228858


In [4]:
# loading movie dataset
movies = pd.read_csv(r"D:\DEPI\movies.csv")
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


**Advanced** **Techniques**

**GANS**

In [63]:
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn

In [64]:
#normalize ratings for gans
scaler = MinMaxScaler()
ratings['rating'] = scaler.fit_transform(ratings[['rating']])

# data to a tensorFlow dataset
df = tf.data.Dataset.from_tensor_slices((ratings['userId'], ratings['movieId'], ratings['rating']))

# shuffle and batch the dataset
batch_size = 64
df = df.shuffle(buffer_size=1024).batch(batch_size)

# prefetch for performance
df = df.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

# create an iterator to convert batches into numpy arrays
iterator = iter(df)

# fetch a batch from the iterator
user_batch, movie_batch, rating_batch = next(iterator)

# tensorFlow tensors to NumPy arrays
user_batch = user_batch.numpy()
movie_batch = movie_batch.numpy()
rating_batch = rating_batch.numpy()


In [65]:
# define generator
def generator(latent_dim):
    model = nn.Sequential(
        nn.Linear(latent_dim, 128),
        nn.ReLU(),
        nn.Linear(128, 256),
        nn.ReLU(),
        nn.Linear(256, 512),
        nn.ReLU(),
        nn.Linear(512, 3),
        nn.Sigmoid()
    )
    return model

In [66]:
# define discriminator
def discriminator():
    model = nn.Sequential(
        nn.Linear(3, 512),
        nn.ReLU(),
        nn.Linear(512, 256),
        nn.ReLU(),
        nn.Linear(256, 128),
        nn.ReLU(),
        nn.Linear(128, 1),
        nn.Sigmoid()
    )
    return model

In [67]:
# training loop for a gans

def train_gan(generator, discriminator, real_data, latent_dim, epochs, batch_size):
    # define optimizers
    optimizer_g = torch.optim.Adam(generator.parameters(), lr=0.01)
    optimizer_d = torch.optim.Adam(discriminator.parameters(), lr=0.01)

    # define loss function
    criterion = nn.BCELoss()

    for epoch in range(epochs):
        # create real and fake labels
        real_labels = torch.ones((batch_size, 1))
        fake_labels = torch.zeros((batch_size, 1))


        # train discriminator
        optimizer_d.zero_grad()

        # real data to tensor
        real_data = torch.FloatTensor(real_data)
        d_loss_real = criterion(discriminator(real_data), real_labels)


        # fake data
        z = torch.randn(batch_size, latent_dim)
        # noise to create fake data by generator
        fake_data = generator(z)
        d_loss_fake = criterion(discriminator(fake_data.detach()), fake_labels)

        # total discriminator loss
        d_loss = (d_loss_real + d_loss_fake) / 2
        d_loss.backward()
        optimizer_d.step()



        # train generator
        optimizer_g.zero_grad()

        g_loss = criterion(discriminator(fake_data), real_labels)
        # gradients for the generator
        g_loss.backward()
        # updates weight
        optimizer_g.step()

        if epoch % 1000 == 0:
            print(f"Epoch {epoch}: D Loss: {d_loss.item()}, G Loss: {g_loss.item()}")


In [68]:
 #initialize & train gan to generate fake data based on real data


# define latent dimension for noise input to generator
latent_dim = 1000

# initialize generator and discriminator
gen = generator (latent_dim)
dis = discriminator()

user_movie_ratings = np.column_stack((user_batch, movie_batch, rating_batch))

# train the gan with real data
train_gan(gen, dis, user_movie_ratings, latent_dim=latent_dim, epochs=5000, batch_size=64)

Epoch 0: D Loss: 0.36299222707748413, G Loss: 0.5332573652267456
Epoch 1000: D Loss: 6.390917353773512e-19, G Loss: 41.20109176635742
Epoch 2000: D Loss: 6.390917353773512e-19, G Loss: 41.20109176635742
Epoch 3000: D Loss: 6.390917353773512e-19, G Loss: 41.20109176635742
Epoch 4000: D Loss: 6.390917353773512e-19, G Loss: 41.20109176635742


In [69]:
# stopping criteria

#define stopping criteria
def train_gan(generator, latent_dim, epochs=10000, patience=10):
    best_loss = float('inf')
    epochs_without_improvement = 0
    stopping_criteria_met = False

    #train loop
    for epoch in range(epochs):
        # replace simulated loss with actual loss
        loss = torch.randn(1).item()

        # check if loss improved
        if loss < best_loss:
            best_loss = loss
            epochs_without_improvement = 0
        else:
            epochs_without_improvement += 1

        # early stopping condition
        if epochs_without_improvement >= patience:
            print(f"Stopping early at epoch {epoch} due to no improvement in loss.")
            stopping_criteria_met = True
            break

        if epoch % 1000 == 0:
            print(f'Epoch {epoch}: Loss = {loss}')

    if not stopping_criteria_met:
        print(f"Training completed for {epochs} epochs.")

latent_dim = 10
generator = generator(latent_dim)

train_gan(generator, latent_dim, epochs=10000, patience=10)


Epoch 0: Loss = 0.21997517347335815
Stopping early at epoch 27 due to no improvement in loss.


**MLOPs**

MLFLOW

In [None]:
import mlflow
import mlflow.sklearn 
from sklearn.model_selection import train_test_split
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import  precision_score


mlflow.set_tracking_uri(uri="http://127.0.0.1:5000")
mlflow.set_experiment("recommendation_system_experiment")

model = pd.merge(ratings, movies, on='movieId')

train_data , test_data = train_test_split(model , test_size=0.3 , random_state=42)

with mlflow.start_run() :
     
    n_neighbors = 5  # Number of neighbors
    algorithm = 'auto'  # Algorithm used to search for neighbors
    mlflow.log_param("n_neighbors", n_neighbors)
    mlflow.log_param("algorithm", algorithm)

    knn = NearestNeighbors(n_neighbors=n_neighbors, algorithm=algorithm, metric='cosine')
    knn.fit(model)

    distances, indices = knn.kneighbors(model)

    k = 5

    user_id = int(input("Enter user ID between 1 & 5 : ")) 
    user_ratings = ratings[ratings['userId'] == user_id]

    if user_ratings.empty:

        print(f"No ratings found for user ID {user_id}. Please check the ID and try again.")

    else:
        predicted_ratings = model[user_id - 1] 

        actual_ratings = user_ratings['rating'].tolist()

        threshold = 3
        
        # Convert actual and predicted ratings to binary values (1 for relevant, 0 for not relevant)
        actual_binary = [1 if rating >= threshold else 0 for rating in actual_ratings]
        predicted_binary = [1 if rating >= threshold else 0 for rating in predicted_ratings if rating > 0]  # Ensure only valid predicted ratings

        # Calculate Precision
        precision = precision_score(actual_binary, predicted_binary, zero_division=0)

        mlflow.log_metric("Precision", precision)
        
        print("Precision: " , precision )

mlflow.end_run()
        

In [70]:
model = pd.merge(ratings, movies, on='movieId')

Prompt Engineering

In [71]:
# recommend movies function based on rating
def recommend_movies_rating(rating_input):


    # filter data to get movies that user ask for
    recommended_movies = model[model['rating'] == rating_input]

    if recommended_movies.empty:
        return ["Sorry, no movies found with that rating."]

    # extract the movie titles from the filtered DataFrame
    return recommended_movies['title'].unique()[:5]

# chatbot function
def movies_rating_chatbot():
    print("Welcome to the Movie Recommendation Bot!")

    while True:
        # ask the user for a rating to search for movies
        user_input = input("Enter a rating to find movies or 'quit' to exit): ").strip()

        if user_input.lower() == 'quit':
            print("Goodbye! Enjoy your movies!")
            break

        try:
            # convert input to float for rating comparison
            rating_input = float(user_input)

            if rating_input < 1 or rating_input > 5:
                print("Please enter a rating between 1 and 5.")
                continue

        except ValueError:
            print("Please enter a valid number.")
            continue

        # get recommendations based on rating
        recommendations = recommend_movies_rating(rating_input)

        # run recommended movies
        if len(recommendations) > 0:
            print(f"Movies with a rating of {rating_input}: {', '.join(recommendations)}")
        else:
            print(f"Sorry, no movies found with a rating of {rating_input}.")

# run chatbot
movies_rating_chatbot()



Welcome to the Movie Recommendation Bot!
Enter a rating to find movies or 'quit' to exit): 1
Movies with a rating of 1.0: Shanghai Triad (Yao a yao yao dao waipo qiao) (1995), Twelve Monkeys (a.k.a. 12 Monkeys) (1995), White Balloon, The (Badkonake sefid) (1995), Taxi Driver (1976), Doom Generation, The (1995)
Enter a rating to find movies or 'quit' to exit): quit
Goodbye! Enjoy your movies!
