In [None]:
import pandas as pd
from io import StringIO

# Load the dataset
data = """
User,Movie,Rating
Alice,Star Wars,5
Frank,The Godfather,4
Bob,Titanic,5
Carol,The Matrix,3
Dave,Inception,2
Emily,Pulp Fiction,4
Quincy,Star Wars,5
Bob,Star Wars,2
Frank,Forrest Gump,3
Alice,The Matrix,4
Paul,Titanic,4
Carol,Inception,5
Dave,Titanic,4
Emily,The Godfather,5
Alice,Inception,3
Paul,The Godfather,5
Carol,Pulp Fiction,4
Ivy,Star Wars,5
Emily,Forrest Gump,3
Quincy,The Matrix,4
Ivy,The Matrix,4
Paul,Inception,1
Bob,The Godfather,4
Frank,Star Wars,5
Ivy,Inception,3
Dave,Forrest Gump,5
Quincy,Inception,3
Karen,The Matrix,3
Leo,Titanic,4
Mia,The Godfather,5
Nina,Star Wars,5
Oscar,The Matrix,4
Sarah,Star Wars,5
Tom,Pulp Fiction,Five
Karen,Inception,5
Leo,Inception,2
Mia,Pulp Fiction,4
Nina,The Godfather,4
Oscar,Pulp Fiction,2
Sarah,Inception,4
Tom,Inception,
Victor,The Godfather,
Wendy,Pulp Fiction,4
Zane,Pulp Fiction,3.5
Uma,Titanic,5
Victor,Matrix,x
Wendy,The Matrix,3.5
Xander,Forrest Gump,5
Yara,Inception,5y
Zane,Forrest Gump,3.5
Uma,Star Wars,2?
Yara,The Matrix,N/A
Karen,Pulp Fiction,4
Leo,Forrest Gump,5
Mia,Forrest Gump,3
Nina,Forrest Gump,3
Oscar,Forrest Gump,5
Zane,Star Wars,4
"""

# Create a DataFrame
df = pd.read_csv(StringIO(data))

# Clean the data
df['Rating'] = pd.to_numeric(df['Rating'], errors='coerce')

# Display cleaned data
print(df)


In [None]:
import pandas as pd
from io import BytesIO

with open('data.csv', 'rb') as f:
    data = f.read()

df = pd.read_csv(BytesIO(data))

print(df)


In [None]:
print(df['Rating'].dtype)

df['Rating'] = pd.to_numeric(df['Rating'], errors='coerce')

df = df.dropna(subset=['Rating'])

print(df)



In [40]:
# Basic statistics
average_rating = df['Rating'].mean()
max_rating = df['Rating'].max()
min_rating = df['Rating'].min()
total_users = df['User'].nunique()
total_movies = df['Movie'].nunique()

print(f"Average Rating: {average_rating:.2f}")
print(f"Maximum Rating: {max_rating}")
print(f"Minimum Rating: {min_rating}")
print(f"Total Users: {total_users}")
print(f"Total Movies: {total_movies}")


Average Rating: 3.89
Maximum Rating: 5.0
Minimum Rating: 1.0
Total Users: 19
Total Movies: 7


In [None]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.impute import SimpleImputer

# Load the dataset
# Use your cleaned dataset or the one from the previous code
# df = ...

# Pivot the data to create a user-movie matrix
user_movie_matrix = df.pivot_table(index='User', columns='Movie', values='Rating')

# Fill missing values with the mean rating of each user
imputer = SimpleImputer(strategy='mean')
user_movie_matrix_filled = pd.DataFrame(imputer.fit_transform(user_movie_matrix), 
                                        columns=user_movie_matrix.columns, 
                                        index=user_movie_matrix.index)

# Calculate cosine similarity between users
user_similarity = cosine_similarity(user_movie_matrix_filled)

# Convert the similarity matrix into a DataFrame
user_similarity_df = pd.DataFrame(user_similarity, index=user_movie_matrix_filled.index, columns=user_movie_matrix_filled.index)

def get_movie_recommendations(user, n=5):
    """
    Get movie recommendations for a user based on collaborative filtering.
    """
    user_ratings = user_movie_matrix_filled.loc[user].values.reshape(1, -1)
    similarity_scores = user_similarity_df[user].values.reshape(1, -1)

    # Predict the user's ratings for all movies
    predicted_ratings = similarity_scores.dot(user_ratings.T) / similarity_scores.sum()

    # Create a DataFrame with predicted ratings
    predicted_ratings_df = pd.DataFrame(predicted_ratings, columns=user_movie_matrix_filled.columns, index=[user])

    # Identify movies not yet rated by the user
    unrated_movies = user_movie_matrix_filled.loc[user_movie_matrix_filled.loc[user].isna()].index

    # Sort movies by predicted rating and get top recommendations
    recommendations = predicted_ratings_df.loc[:, unrated_movies].transpose().sort_values(by=user, ascending=False).head(n)

    return recommendations.index

def get_popular_movies(n=5):
    """
    Get popular movies as fallback recommendations.
    """
    popular_movies = df.groupby('Movie')['Rating'].mean().sort_values(ascending=False).head(n)
    return popular_movies.index

# Example usage
user = 'Alice'
user_recommendations = get_movie_recommendations(user)
fallback_recommendations = get_popular_movies()

print(f"Recommendations for {user}:")
print(user_recommendations)

print("\nFallback Recommendations:")
print(fallback_recommendations)
print(fallback_recommendations)
