<a href="https://colab.research.google.com/github/Kamal-Chandra/Sparse-Recommendations/blob/main/oldSystem.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

# Load data from CSV file into a DataFrame, considering only the necessary columns
ratings_df = pd.read_csv("/content/ratings.csv", usecols=["userId", "movieId", "rating", "genres", "title"])

# Get unique user and movie IDs
user_ids = ratings_df['userId'].unique()
movie_ids = ratings_df['movieId'].unique()

# Create a matrix filled with NaN values
matrix = pd.DataFrame(index=user_ids, columns=movie_ids)

# Create a matrix for movie vs genres
genres = ["Action", "Adventure", "Animation", "Children's", "Comedy", "Crime", "Documentary",
          "Drama", "Fantasy", "Film-Noir", "Horror", "Musical", "Mystery", "Romance",
          "Sci-Fi", "Thriller", "War", "Western"]

movie_genre_matrix = pd.DataFrame(index=movie_ids, columns=genres)

# Fill the matrix with ratings and movie vs genres
for index, row in ratings_df.iterrows():
    user_id = row['userId']
    movie_id = row['movieId']
    rating = row['rating']
    genres_str = row['genres']

    matrix.at[user_id, movie_id] = rating

    movie_genres = genres_str.split('|')
    for genre in genres:
        if genre in movie_genres:
            movie_genre_matrix.at[movie_id, genre] = 1
        else:
            movie_genre_matrix.at[movie_id, genre] = 0

# Convert the matrix to numeric type (downcast)
matrix = matrix.astype(float)

# Fill NaN values with 0
matrix = matrix.fillna(0)

# Display the resulting matrix
print(matrix)

# Create the transpose of the matrix
transpose_matrix = matrix.transpose()

# Display the transpose matrix
print(transpose_matrix)

# Display the movie vs genres matrix
print(movie_genre_matrix)

     1       3       6       47      50      70      101     110     151     \
1       4.0     4.0     4.0     5.0     5.0     3.0     5.0     4.0     5.0   
2       0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
3       0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
4       0.0     0.0     0.0     2.0     0.0     0.0     0.0     0.0     0.0   
5       4.0     0.0     0.0     0.0     4.0     0.0     0.0     4.0     0.0   
..      ...     ...     ...     ...     ...     ...     ...     ...     ...   
606     2.5     0.0     0.0     3.0     4.5     4.0     0.0     3.5     0.0   
607     4.0     0.0     0.0     0.0     0.0     0.0     0.0     5.0     0.0   
608     2.5     2.0     0.0     4.5     4.5     3.0     0.0     4.0     0.0   
609     3.0     0.0     0.0     0.0     0.0     0.0     0.0     3.0     0.0   
610     5.0     0.0     5.0     5.0     4.0     4.0     0.0     4.5     0.0   

     157     ...  147662  148166  149011  152372  1

In [None]:
# Initialize a dictionary to store user preferences
user_preferences = {}

# Iterate through each user
for user_id in user_ids:
    # Initialize a dictionary to store genre preferences for the current user
    user_genre_preferences = {genre: 0 for genre in genres}

    # Get movies rated by the current user
    rated_movies = ratings_df[ratings_df['userId'] == user_id]

    # Check if the user has rated any movies
    if not rated_movies.empty:
        # Iterate through movies rated by the user
        for index, row in rated_movies.iterrows():
            movie_id = row['movieId']
            rating = row['rating']

            # Look up movie genres in the movie-genres matrix
            movie_genres = movie_genre_matrix.loc[movie_id]

            # Update genre preferences for the current user based on movie genres and rating
            for genre, value in movie_genres.items():
                user_genre_preferences[genre] += value * rating

        # Normalize preferences using Min-Max normalization
        max_value = max(user_genre_preferences.values())
        min_value = min(user_genre_preferences.values())
        if max_value != min_value:
            for genre in user_genre_preferences:
                user_genre_preferences[genre] = (user_genre_preferences[genre] - min_value) / (max_value - min_value)

        # Store user preferences in the dictionary
        user_preferences[user_id] = user_genre_preferences

# Convert user preferences dictionary to DataFrame for better organization
user_preferences_df = pd.DataFrame(user_preferences).T

# Display user preferences
print(user_preferences_df)


       Action  Adventure  Animation  Children's    Comedy     Crime  \
1    1.000000   0.958869   0.349614         0.0  0.912596  0.503856   
2    0.659091   0.189394   0.000000         0.0  0.424242  0.575758   
3    0.793651   0.476190   0.031746         0.0  0.142857  0.015873   
4    0.198565   0.253589   0.057416         0.0  0.873206  0.246411   
5    0.294737   0.273684   0.273684         0.0  0.547368  0.484211   
..        ...        ...        ...         ...       ...       ...   
606  0.181543   0.194781   0.059002         0.0  0.567700  0.183812   
607  0.814590   0.474164   0.060790         0.0  0.556231  0.313070   
608  0.949563   0.600103   0.176531         0.0  1.000000  0.542975   
609  0.531250   0.500000   0.046875         0.0  0.359375  0.328125   
610  1.000000   0.531561   0.138329         0.0  0.823798  0.557346   

     Documentary     Drama   Fantasy  Film-Noir    Horror   Musical   Mystery  \
1       0.000000  0.791774  0.519280   0.012853  0.151671  0.26478

In [None]:
def collaborative_filtering_recommendation(user_id, matrix, k=1000):
    # Get the ratings of the specified user
    user_ratings = matrix.loc[user_id]

    # Find similar users using cosine similarity
    similarities = matrix.apply(lambda row: row.corr(user_ratings, method='pearson'), axis=1)
    similar_users = similarities.drop(user_id).nlargest(k).index

    # Generate recommendations based on the preferences of similar users
    recommendations = []
    for user in similar_users:
        similar_user_ratings = matrix.loc[user]
        unrated_movies = similar_user_ratings[user_ratings.isna()].dropna().index
        recommendations.extend(unrated_movies)

    # Remove duplicates from recommendations
    recommendations = list(set(recommendations))

    return recommendations

In [None]:
# Iterate over 20 user IDs
for user_id in range(1, 21):
    # Specify the number of similar users to consider
    k = 1000

    # Call the collaborative_filtering_recommendation function
    recommendations = collaborative_filtering_recommendation(user_id, matrix, k)

    # Print the recommended movie IDs for the current user
    print("Recommended movies for user", user_id, ":", recommendations)

Recommended movies for user 1 : []
Recommended movies for user 2 : []
Recommended movies for user 3 : []
Recommended movies for user 4 : []
Recommended movies for user 5 : []
Recommended movies for user 6 : []
Recommended movies for user 7 : []
Recommended movies for user 8 : []
Recommended movies for user 9 : []
Recommended movies for user 10 : []
Recommended movies for user 11 : []
Recommended movies for user 12 : []
Recommended movies for user 13 : []
Recommended movies for user 14 : []
Recommended movies for user 15 : []
Recommended movies for user 16 : []
Recommended movies for user 17 : []
Recommended movies for user 18 : []
Recommended movies for user 19 : []
Recommended movies for user 20 : []
