Dataset avaialble at :- https://grouplens.org/datasets/hetrec-2011/

#R-BY-E

In [None]:
import pandas as pd

# Load movies and movie genres data
movies_df = pd.read_csv("/content/movies.dat", sep="\t", encoding="latin1")
movie_genres_df = pd.read_csv("/content/movie_genres.dat", sep="\t", encoding="latin1")

# Define similarity threshold and epsilon
theta = 0.3  # Set your desired similarity threshold here
epsilon = 0.1  # Set your desired epsilon here

def similarity(genres1, genres2):
    return len(set(genres1).intersection(genres2)) / len(genres1)

def reward(movie_id, profile_movies, candidate_movie_id, theta, epsilon):
    candidate_features = set(movie_genres_df[movie_genres_df['movieID'] == candidate_movie_id]['genre'])
    candidate_covered = set()
    for movie in profile_movies:
        movie_features = set(movie_genres_df[movie_genres_df['movieID'] == movie]['genre'])
        candidate_covered.update(candidate_features.intersection(movie_features))

    fi_minus_covered = len(candidate_features - candidate_covered)
    fi = len(candidate_features)
    fp_minus_covered = len(candidate_features - candidate_covered)
    fp = len(candidate_features)

    if fi == 0 or fp == 0:
        return 0

    reward_value = (fi_minus_covered / fi) + (fp_minus_covered / fp)
    return reward_value

def get_movie_title(movie_id):
    return movies_df[movies_df['id'] == movie_id]['title'].iloc[0]

def get_movie_features(movie_id):
    return movie_genres_df[movie_genres_df['movieID'] == movie_id]['genre'].tolist()

def generate_explanation_chain(candidate_movie_id, profile_movies, theta, epsilon):
    explanation_chain = []
    for movie_id in profile_movies:
        rwd = reward(movie_id, profile_movies, candidate_movie_id, theta, epsilon)
        if rwd > epsilon:
            explanation_chain.append(movie_id)
    return explanation_chain

def scoring(chain, candidate_movie_id, profile_movies, theta, epsilon, selected_chains):
    sum_rwds = sum(reward(movie_id, profile_movies, candidate_movie_id, theta, epsilon) for movie_id in chain)
    diversity_penalty = sum(len(set(get_movie_features(candidate_movie_id)).difference(get_movie_features(movie_id))) for movie_id in chain)
    score = (sum_rwds / (len(chain) + 1)) + (diversity_penalty / (len(chain) + 1))
    return score

def select_chains(chains, n):
    chains.sort(key=lambda x: x[2], reverse=True)
    selected_chains = chains[:n]
    return selected_chains

def recommend_movies(user_profile, n_recommendations, theta, epsilon):
    candidate_movies = movies_df['id'].tolist()
    recommended_movies = []
    selected_chains = []  # Initialize an empty list to store selected chains
    for candidate_movie_id in candidate_movies:
        if candidate_movie_id not in user_profile:  # Exclude movies already in user's profile
            explanation_chain = generate_explanation_chain(candidate_movie_id, user_profile, theta, epsilon)
            if explanation_chain:
                score = scoring(explanation_chain, candidate_movie_id, user_profile, theta, epsilon, selected_chains)
                recommended_movies.append((candidate_movie_id, explanation_chain, score))

    top_n_chains = select_chains(recommended_movies, n_recommendations)
    return top_n_chains

# Example usage:
user_profile = [1, 34, 98, 123, 500]  # Example user profile with movie IDs
n_recommendations = 5

recommendations = recommend_movies(user_profile, n_recommendations, theta, epsilon)
print("Recommendations:")
for movie_id, explanation_chain, score in recommendations:
    movie_title = get_movie_title(movie_id)
    explanation_chain_info = [(get_movie_title(movie_id), get_movie_features(movie_id)) for movie_id in explanation_chain]
    print(f"Movie ID: {movie_id}, Title: {movie_title}, Explanation Chain:")
    for idx, (predecessor_title, predecessor_features) in enumerate(explanation_chain_info):
        print(f"  Predecessor {idx + 1}: Title: {predecessor_title}, Features: {predecessor_features}")
    print(f"Score: {score}")


Recommendations:
Movie ID: 51709, Title: Gwoemul, Explanation Chain:
  Predecessor 1: Title: Toy story, Features: ['Adventure', 'Animation', 'Children', 'Comedy', 'Fantasy']
  Predecessor 2: Title: Babe, Features: ['Children', 'Comedy', 'Drama', 'Fantasy']
  Predecessor 3: Title: Going Shopping, Features: ['Action', 'Thriller']
  Predecessor 4: Title: Chung Hing sam lam, Features: ['Drama', 'Mystery', 'Romance']
  Predecessor 5: Title: Mrs. Doubtfire, Features: ['Comedy', 'Drama']
Score: 5.25
Movie ID: 6741, Title: God Told Me To, Explanation Chain:
  Predecessor 1: Title: Toy story, Features: ['Adventure', 'Animation', 'Children', 'Comedy', 'Fantasy']
  Predecessor 2: Title: Babe, Features: ['Children', 'Comedy', 'Drama', 'Fantasy']
  Predecessor 3: Title: Going Shopping, Features: ['Action', 'Thriller']
  Predecessor 4: Title: Chung Hing sam lam, Features: ['Drama', 'Mystery', 'Romance']
  Predecessor 5: Title: Mrs. Doubtfire, Features: ['Comedy', 'Drama']
Score: 4.833333333333334
Mo

#Content_Based

In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import pairwise_distances
import pickle

# Load movie data
movies = pd.read_csv('/content/movies.dat', sep='\t', encoding='latin1', engine='python')
genres = pd.read_csv('/content/movie_genres.dat', sep='\t', encoding='latin1', engine='python')

# Merge movie data with genres
movies = movies.merge(genres, left_on='id', right_on='movieID')

# Combine movie title and genre for each movie
movies['tags'] = movies['genre']

# Initialize CountVectorizer
cv = CountVectorizer(max_features=5000, stop_words='english')

# Fit and transform the data
vector = cv.fit_transform(movies['tags']).toarray()

# Compute Jaccard similarity
similarity = pairwise_distances(vector, metric='jaccard')

# Function to recommend similar movies based on Jaccard similarity
def recommend_similar_movies(user_profile, threshold=0.2, num_recommendations=5):
    recommended_movies = set()
    user_profile_set = set(user_profile)
    for movie_id in user_profile:
        index = movies[movies['id'] == movie_id].index[0]
        similar_movies_indices = np.where(similarity[index] < threshold)[0]
        for idx in similar_movies_indices:
            recommended_movies.add(movies.iloc[idx]['title'])
    # Remove movies already in the user's profile
    recommended_movies -= user_profile_set
    return list(recommended_movies)[:num_recommendations]

# Example user profile
user_profile = [1, 34, 98, 123, 500]

# Number of top recommendations to display
num_recommendations = 5

# Recommend similar movies for the user profile
recommended_movies = recommend_similar_movies(user_profile, num_recommendations=num_recommendations)

print(f"Top {num_recommendations} recommended movies:")
for movie in recommended_movies:
    print(movie)

# Save processed data and similarity matrix
pickle.dump(movies, open('movie_list.pkl', 'wb'))
pickle.dump(similarity, open('similarity.pkl', 'wb'))




Top 5 recommended movies:
Return of the Secaucus Seven
Fried Green Tomatoes
Gongdong gyeongbi guyeok JSA
Captain Horatio Hornblower R.N.
How She Move


#Rough Work

In [None]:
l= [1, 34, 98, 123, 500]
for i in l:
  title=get_movie_title(i)
  feats=get_movie_features(i)
  print(f'movie id {i} ,has movie title :- {title} and, has features :- {feats}')

movie title :- Toy story, has features :- ['Adventure', 'Animation', 'Children', 'Comedy', 'Fantasy']
movie title :- Babe, has features :- ['Children', 'Comedy', 'Drama', 'Fantasy']
movie title :- Going Shopping, has features :- ['Action', 'Thriller']
movie title :- Chung Hing sam lam, has features :- ['Drama', 'Mystery', 'Romance']
movie title :- Mrs. Doubtfire, has features :- ['Comedy', 'Drama']


In [None]:
l=[51709,6741,6966,52281,31804]
candidate_set =set()
for i in l:
  title=get_movie_title(i)
  feats=get_movie_features(i)
  print(f'movie id {i} ,has movie title :- {title} and, has features :- {feats}')
  for j in feats:
    candidate_set.add(j)

print(candidate_set)

movie id 51709 ,has movie title :- Gwoemul and, has features :- ['Action', 'Adventure', 'Comedy', 'Drama', 'Fantasy', 'Horror', 'Sci-Fi', 'Thriller']
movie id 6741 ,has movie title :- God Told Me To and, has features :- ['Crime', 'Horror', 'Mystery', 'Sci-Fi', 'Thriller']
movie id 6966 ,has movie title :- Darkman and, has features :- ['Action', 'Crime', 'Horror', 'Sci-Fi', 'Thriller']
movie id 52281 ,has movie title :- Grindhouse and, has features :- ['Action', 'Crime', 'Horror', 'Sci-Fi', 'Thriller']
movie id 31804 ,has movie title :- Nochnoy dozor and, has features :- ['Action', 'Fantasy', 'Horror', 'Mystery', 'Sci-Fi', 'Thriller']
{'Action', 'Mystery', 'Sci-Fi', 'Drama', 'Comedy', 'Horror', 'Crime', 'Fantasy', 'Adventure', 'Thriller'}


In [None]:
l= [1, 34, 98, 123, 500]
profile_set =set()
for i in l:
  title=get_movie_title(i)
  feats=get_movie_features(i)
  print(f'movie id {i} ,has movie title :- {title} and, has features :- {feats}')
  for j in feats:
    profile_set.add(j)

print(profile_set)

movie id 1 ,has movie title :- Toy story and, has features :- ['Adventure', 'Animation', 'Children', 'Comedy', 'Fantasy']
movie id 34 ,has movie title :- Babe and, has features :- ['Children', 'Comedy', 'Drama', 'Fantasy']
movie id 98 ,has movie title :- Going Shopping and, has features :- ['Action', 'Thriller']
movie id 123 ,has movie title :- Chung Hing sam lam and, has features :- ['Drama', 'Mystery', 'Romance']
movie id 500 ,has movie title :- Mrs. Doubtfire and, has features :- ['Comedy', 'Drama']
{'Action', 'Mystery', 'Drama', 'Children', 'Comedy', 'Romance', 'Fantasy', 'Adventure', 'Animation', 'Thriller'}


In [None]:
print("Features left out by cnaduidates :- ", profile_set.difference(candidate_set))
print("Features Added by candidates :- ", candidate_set.difference(profile_set))

Features left out by cnaduidates :-  {'Romance', 'Animation', 'Children'}
Features Added by candidates :-  {'Sci-Fi', 'Horror', 'Crime'}
