In [None]:
import pandas as pd

# Load movies and movie genres data
movies_df = pd.read_csv("/content/movies.dat", sep="\t", encoding="latin1")
movie_genres_df = pd.read_csv("/content/movie_genres.dat", sep="\t", encoding="latin1")

# Define similarity threshold and epsilon
theta = 0.5  # Set your desired similarity threshold here
epsilon = 0.3  # Set your desired epsilon here

def similarity(genres1, genres2):
    return len(set(genres1).intersection(genres2)) / len(set(genres1).union(genres2))

def reward(movie_id, candidate_movie_id, candidate_features_covered , theta, epsilon):
    candidate_features= set(get_movie_features(candidate_movie_id)) #set(movie_genres_df[movie_genres_df['movieID'] == candidate_movie_id]['genre'])
    profile_movie_features= set(get_movie_features(movie_id))#set(movie_genres_df[movie_genres_df['movieID'] == movie_id]['genre'])
    candidate_features_covered = candidate_features_covered

    n=profile_movie_features.intersection(candidate_features)
    num=n.difference(candidate_features_covered)
    candidate_features_covered.update(num)

    fi = len(candidate_features)
    fp_minus_covered=len(num)
    fp = len(profile_movie_features)

    reward_value = (fp_minus_covered / fi) + (fp_minus_covered / fp)
    return reward_value, candidate_features_covered

def get_movie_title(movie_id):
    return movies_df[movies_df['id'] == movie_id]['title'].iloc[0]

def get_movie_features(movie_id):
    return movie_genres_df[movie_genres_df['movieID'] == movie_id]['genre'].tolist()

def generate_explanation_chain(candidate_movie_id, profile_movies, theta, epsilon):
    explanation_chain = []
    rwd_sum=0
    candidate_features_covered=set()
    for movie_id in profile_movies:
        if (similarity(get_movie_features(candidate_movie_id),get_movie_features(movie_id)) > theta):
          rwd, candidate_features_covered=reward(movie_id, candidate_movie_id, candidate_features_covered, theta, epsilon)
          if (rwd > epsilon):
            explanation_chain.append(movie_id)
            rwd_sum+=rwd
    return explanation_chain,rwd_sum

def scoring(chain, rwd_sum,candidate_movie_id, selected_chains):
    sum_rwds = rwd_sum
    # diversity_penalty = sum(len(set(chain).difference(selected_chain[1])) for selected_chain in selected_chains)
    # for i in range(len(selected_chains)):
      # dp=set(chain).difference(selected_chains[i])
    dp=set()
    for i in chain:
      for j in selected_chains:
        if i not in j:
          dp.add(i)
    diversity_penalty=len(dp)
    score = (sum_rwds / (len(chain) + 1)) + (diversity_penalty / (len(chain) + 1))
    return score

def select_chains(chains, n):
    chains.sort(key=lambda x: x[2], reverse=True)
    selected_chains = []
    for chain in chains:
        if len(selected_chains) < n:
            selected_chains.append(chain)
        else:
            break
    return selected_chains

def recommend_movies(user_profile, n_recommendations, theta, epsilon):
    candidate_movies = movies_df['id'].tolist()
    recommended_movies = []
    selected_chains = []  # Initialize an empty list to store selected chains
    L=[]
    for candidate_movie_id in candidate_movies:
        if candidate_movie_id not in user_profile:  # Exclude movies already in user's profile
            explanation_chain, rwd_sum = generate_explanation_chain(candidate_movie_id, user_profile, theta, epsilon)
    #         i=[candidate_movie_id,explanation_chain, rwd_sum]
    #         L.append(i)
    # L.sort(key=lambda x: x[2], reverse=True)
    # for candidate_movie_id in candidate_movies:
    #     if candidate_movie_id not in user_profile:
            if explanation_chain:
                # score = scoring(explanation_chain, rwd_sum,candidate_movie_id, user_profile, selected_chains)
                score=scoring(explanation_chain, rwd_sum, candidate_movie_id, selected_chains)
                selected_chains.append(explanation_chain)
                recommended_movies.append((candidate_movie_id, explanation_chain, score))

    top_n_chains = select_chains(recommended_movies, n_recommendations)
    return top_n_chains

#n=input("Enter no. of recommendations wanted :- ")
#loop=input("Enter no. of movies in profile")
#user_prof=[]
#for in range(loop):
#mn=input("Enter MOvie Name :- ")
#user_prof.append(movies[movies['title'].lower==mn.lower].id)
#recommendations = recommend_movies(user_profile, n_recommendations, theta, epsilon)

# Example usage:
user_profile = [1, 34, 98, 123, 500, 1213,45]
n_recommendations = 10

recommendations = recommend_movies(user_profile, n_recommendations, theta, epsilon)
print("Recommendations:")
for movie_id, explanation_chain, score in recommendations:
    movie_title = get_movie_title(movie_id)
    explanation_chain_info = [(get_movie_title(movie_id), get_movie_features(movie_id)) for movie_id in explanation_chain]
    print(f"Movie ID: {movie_id}, Title: {movie_title}, Candidate Movie Genre: {get_movie_features(movie_id)} , Explanation Chain:")
    for idx, (predecessor_title, predecessor_features) in enumerate(explanation_chain_info):
        print(f"  Predecessor {idx + 1}: Title: {predecessor_title}, Features: {predecessor_features}")
    print(f"Score: {score}")

Recommendations:
Movie ID: 16, Title: Casino, Candidate Movie Genre: ['Crime', 'Drama'] , Explanation Chain:
  Predecessor 1: Title: Goodfellas, Features: ['Crime', 'Drama']
Score: 1.5
Movie ID: 30, Title: Yao a yao yao dao waipo qiao, Candidate Movie Genre: ['Crime', 'Drama'] , Explanation Chain:
  Predecessor 1: Title: Goodfellas, Features: ['Crime', 'Drama']
Score: 1.5
Movie ID: 36, Title: Dead Man Walking, Candidate Movie Genre: ['Crime', 'Drama'] , Explanation Chain:
  Predecessor 1: Title: Goodfellas, Features: ['Crime', 'Drama']
Score: 1.5
Movie ID: 72, Title: Kicking & Screaming, Candidate Movie Genre: ['Comedy', 'Drama'] , Explanation Chain:
  Predecessor 1: Title: Mrs. Doubtfire, Features: ['Comedy', 'Drama']
Score: 1.5
Movie ID: 75, Title: Big Bully, Candidate Movie Genre: ['Comedy', 'Drama'] , Explanation Chain:
  Predecessor 1: Title: Mrs. Doubtfire, Features: ['Comedy', 'Drama']
Score: 1.5
Movie ID: 82, Title: Antonia, Candidate Movie Genre: ['Comedy', 'Drama'] , Explanat

In [None]:
user_profile =  [1, 34, 98, 123, 500]
for i in user_profile:
  print(get_movie_title(i))
  print(get_movie_features(i))

Toy story
['Adventure', 'Animation', 'Children', 'Comedy', 'Fantasy']
Babe
['Children', 'Comedy', 'Drama', 'Fantasy']
Going Shopping
['Action', 'Thriller']
Chung Hing sam lam
['Drama', 'Mystery', 'Romance']
Mrs. Doubtfire
['Comedy', 'Drama']


In [None]:
import pandas as pd

# Load movie data
movies_df = pd.read_csv("/content/movies.dat", sep="\t", encoding="latin1")
movie_genres_df = pd.read_csv("/content/movie_genres.dat", sep="\t", encoding="latin1")

# Define similarity threshold
theta = 0.3  # Set your desired similarity threshold here

def similarity(genres1, genres2):
    return len(set(genres1).intersection(genres2)) / len(set(genres1).union(genres2))

def recommend_similar_movies(user_profile, theta):
    recommended_movies = []
    for movie_id in movies_df['id']:
        if movie_id not in user_profile:  # Exclude movies already in user's profile
            movie_genres = set(movie_genres_df[movie_genres_df['movieID'] == movie_id]['genre'])
            profile_similarity = sum(similarity(movie_genres, set(movie_genres_df[movie_genres_df['movieID'] == profile_movie]['genre'])) for profile_movie in user_profile) / len(user_profile)
            if profile_similarity > theta:
                recommended_movies.append(movie_id)
    return recommended_movies

# Example usage:
user_profile = [1, 34, 98, 123, 500]  # Example user profile with movie IDs
recommended_movies = recommend_similar_movies(user_profile, theta)
n=5
count=0
print("Recommended movies:")
m=[]
for movie_id in recommended_movies:
    if count==n:
      break
    movie_title = movies_df[movies_df['id'] == movie_id]['title'].iloc[0]
    m.append(movie_id)
    print(f"Movie ID: {movie_id}, Title: {movie_title}")
    count+=1


Recommended movies:
Movie ID: 4, Title: Waiting to Exhale
Movie ID: 11, Title: The American President
Movie ID: 17, Title: Sense and Sensibility
Movie ID: 18, Title: Four Rooms
Movie ID: 21, Title: Get Shorty


In [None]:
# import pandas as pd
# m_tags=pd.read_csv('/content/movie_tags.dat',sep="\t", encoding="latin1")
# tags=pd.read_csv('/content/tags.dat',sep="\t", encoding="latin1")
# merged_df = pd.merge(m_tags, tags, left_on='tagID', right_on='id')
# merged_df.drop(columns=['tagID', 'id'], inplace=True)

In [None]:
l=['Big Fish','Pearl Harbor','The Illusionist', 'The Notebook']
x=[]
y=[]
for i in l:
  x.append(movies_df[movies_df['title']==i].id.tolist())
# print(x)
for i in x:
  j=i[0]
  y.append(j)
# print(y)
for i in y:
  print(get_movie_title(i))
  print(get_movie_features(i))
  print()

Big Fish
['Drama', 'Fantasy', 'Romance']

Pearl Harbor
['Action', 'Drama', 'Romance', 'War']

The Illusionist
['Drama', 'Fantasy', 'Mystery', 'Romance', 'Thriller']

The Notebook
['Drama', 'Romance', 'War']

