<a href="https://colab.research.google.com/github/Gireesheruvaram/LLM-Beginer/blob/LLMadv-kata1/Llmadvkata1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np

In [None]:
from google.colab import drive
import ast
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:



file_path = "/content/drive/MyDrive/Untitled Folder/movie_ratings.xlsx"


try:
    df = pd.read_excel(file_path)
    print("Dataset loaded successfully!")
except FileNotFoundError:
    print(f"Error: The file at {file_path} was not found.")
    exit()
except Exception as e:
    print(f"Error loading the dataset: {e}")
    exit()

# Display   rows
print(df.head())


Dataset loaded successfully!
  movieId userId                                              title  \
0      m1     u1                                      The Godfather   
1      m2     u1                           The Shawshank Redemption   
2      m3     u1                                    The Dark Knight   
3      m4     u2  The Lord of the Rings: The Fellowship of the Ring   
4      m5     u2                                         The Matrix   

                     genres  rating  
0        ['Crime', 'Drama']       5  
1        ['Drama', 'Crime']       4  
2       ['Action', 'Crime']       3  
3  ['Adventure', 'Fantasy']       4  
4      ['Action', 'Sci-Fi']       5  


In [None]:

try:
    # Parse genres from stringified lists
    df["genres"] = df["genres"].apply(lambda x: eval(x) if isinstance(x, str) else x)
except Exception as e:
    print(f"Error parsing genres: {e}")
    exit()


# Identify unique users and genres
unique_users = df["userId"].unique()
print(f"\nUnique User IDs in DataFrame: {unique_users}")

# Define unique genres
unique_genres = set(genre for genres in df["genres"] for genre in genres)
print(f"\nUnique Genres in DataFrame: {unique_genres}")


Unique User IDs in DataFrame: ['u1' 'u2' 'u3' 'u4' 'u5' 'u6' 'u7' 'u8' 'u9' 'u10' 'u11' 'u12' 'u13'
 'u14' 'u15']

Unique Genres in DataFrame: {'Fantasy', 'Adventure', 'Crime', 'Sci-Fi', 'History', 'Thriller', 'Romance', 'Action', 'Comedy', 'War', 'Drama'}


In [None]:
# Initialize user preference vectors and count vectors
user_preferences = {user: np.zeros(len(unique_genres)) for user in unique_users}
user_genre_counts = {user: np.zeros(len(unique_genres)) for user in unique_users}
genre_index = {genre: i for i, genre in enumerate(unique_genres)}

# Update user preference vectors based on ratings
for _, row in df.iterrows():
    try:
        user = row["userId"]
        rating = row["rating"]
        genres = row["genres"]
        if user not in user_preferences:
            user_preferences[user] = np.zeros(len(unique_genres))
            user_genre_counts[user] = np.zeros(len(unique_genres))
        for genre in genres:
            if genre in genre_index:
                index = genre_index[genre]
                user_preferences[user][index] += rating
                user_genre_counts[user][index] += 1
    except Exception as e:
        print(f"Error processing row {row}: {e}")
        continue

# Calculate average ratings for each genre
for user in user_preferences:
    with np.errstate(divide='ignore', invalid='ignore'):  # Handle division by zero
        user_preferences[user] = np.divide(
            user_preferences[user],
            user_genre_counts[user],
            out=np.zeros_like(user_preferences[user]),
            where=user_genre_counts[user] != 0
        )

# Print user preference vectors (average ratings)
print("\nUser Preference Vectors (Average Ratings):")
for user, vector in user_preferences.items():
    print(f"User {user}: {vector}")


User Preference Vectors (Average Ratings):
User u1: [0.  0.  4.  0.  0.  0.  0.  3.  0.  0.  4.5]
User u2: [4. 4. 0. 5. 0. 0. 0. 5. 0. 0. 0.]
User u3: [0. 5. 4. 5. 0. 0. 0. 5. 4. 0. 0.]
User u4: [5.  0.  5.  0.  0.  0.  4.  0.  0.  0.  4.5]
User u5: [0. 0. 4. 0. 0. 4. 0. 0. 0. 0. 5.]
User u6: [0.  0.  0.  0.  5.  0.  0.  0.  0.  4.  4.5]
User u7: [0.  0.  4.5 0.  0.  0.  0.  0.  0.  0.  4.5]
User u8: [4. 4. 5. 0. 0. 0. 0. 5. 0. 0. 0.]
User u9: [0. 4. 0. 5. 0. 0. 0. 5. 0. 4. 0.]
User u10: [0.  0.  4.5 0.  0.  0.  0.  0.  5.  0.  4. ]
User u11: [0.  0.  5.  0.  0.  5.  4.  0.  0.  0.  4.5]
User u12: [5.  0.  5.  0.  0.  4.  0.  0.  0.  0.  4.5]
User u13: [0.  0.  0.  0.  5.  0.  0.  0.  0.  4.  4.5]
User u14: [0.  0.  4.5 0.  0.  0.  0.  0.  0.  0.  4.5]
User u15: [0. 0. 5. 0. 0. 0. 0. 5. 0. 0. 0.]


In [None]:

# Recommendation Function
def recommend_movies(user_id, user_preferences, external_movie_list, genre_index, top_n=3):
    try:
        if user_id not in user_preferences:
            raise ValueError(f"User {user_id} not found in user preferences.")

        user_vector = user_preferences[user_id]
        recommendations = []

        for movie in external_movie_list:
            try:
                # Validate the movie structure
                if "Title" not in movie or "Genre Vector" not in movie:
                    raise KeyError("Missing 'Title' or 'Genre Vector' in movie data.")

                genre_vector = np.zeros(len(unique_genres))

                # For each genre in the movie, mark the corresponding genre position in genre_vector
                for genre in movie["Genre Vector"]:
                    if genre in genre_index:
                        genre_vector[genre_index[genre]] = 1

                # Calculate the similarity score by taking the dot product between user preferences and genre vector
                similarity_score = np.dot(user_vector, genre_vector)
                recommendations.append((movie["Title"], similarity_score))
            except KeyError as e:
                print(f"Error in movie data: {e}")
                continue
            except Exception as e:
                print(f"Unexpected error with movie {movie['Title']}: {e}")
                continue

        # Sort recommendations by similarity score in descending order
        recommendations.sort(key=lambda x: x[1], reverse=True)
        return [title for title, _ in recommendations[:top_n]]

    except ValueError as e:
        print(f"Error: {e}")
        return []

In [None]:
external_movie_list = [
    {"Title": "The Godfather", "Genre Vector": ["Crime", "Drama"]},
    {"Title": "The Dark Knight", "Genre Vector": ["Action", "Crime"]},
    {"Title": "Inception", "Genre Vector": ["Action", "Sci-Fi"]},
    {"Title": "Forrest Gump", "Genre Vector": ["Drama", "Romance"]},
    {"Title": "The Lord of the Rings: The Fellowship of the Ring", "Genre Vector": ["Adventure", "Fantasy"]},
]

In [None]:
 #Print all available user IDs and select one
print(f"\nAvailable User IDs: {unique_users}")

# Choose a user ID for recommendations with error handling
try:
    user_id_to_recommend = input("Enter the User ID for which you want recommendations: ").strip()

    if user_id_to_recommend not in user_preferences:
        raise ValueError(f"User ID '{user_id_to_recommend}' not found in the dataset.")

    recommended_movies = recommend_movies(user_id_to_recommend, user_preferences, external_movie_list, genre_index)

    if recommended_movies:
        print(f"\nRecommended Movies for User {user_id_to_recommend}:")
        for title in recommended_movies:
            print(title)
    else:
        print(f"No recommendations found for User {user_id_to_recommend}.")

except ValueError as e:
    print(f"Error: {e}")
except Exception as e:
    print(f"Unexpected error: {e}")



Available User IDs: ['u1' 'u2' 'u3' 'u4' 'u5' 'u6' 'u7' 'u8' 'u9' 'u10' 'u11' 'u12' 'u13'
 'u14' 'u15']
Enter the User ID for which you want recommendations: u1

Recommended Movies for User u1:
The Godfather
The Dark Knight
Forrest Gump
