In [3]:
import pandas as pd
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load movie data
movies = pd.read_csv('ml-latest-small/movies.csv')  # Assuming movies.csv contains movieId, title columns

# Basic title cleaning
movies['clean_title'] = movies['title'].str.replace(r'[^a-zA-Z0-9 ]', '', regex=True).str.lower()

# Vectorize movie titles for similarity calculation
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(movies['clean_title'])

# Function to recommend movies based on title similarity
def recommend_movies(title, num_recommendations=5):
    title_clean = re.sub(r'[^a-zA-Z0-9 ]', '', title).lower()
    title_vec = vectorizer.transform([title_clean])
    similarity_scores = cosine_similarity(title_vec, tfidf_matrix).flatten()
    
    # Get indices of top similar movies
    recommended_indices = similarity_scores.argsort()[-num_recommendations-1:-1][::-1]
    return movies.iloc[recommended_indices][['title']]

# Example: Recommend movies similar to a given title
print("Recommendations:")
print(recommend_movies("Toy Story", 5))

# Load ratings data
ratings = pd.read_csv('ml-latest-small/ratings.csv')  # Assuming ratings.csv contains userId, movieId, rating columns

# Get users who liked a given movie
def get_similar_users(movie_id, min_rating=4):
    return ratings[(ratings['movieId'] == movie_id) & (ratings['rating'] >= min_rating)]['userId'].unique()

# Get recommended movies based on similar users' high-rated movies
def recommend_from_similar_users(movie_id, num_recommendations=5):
    similar_users = get_similar_users(movie_id)
    similar_user_ratings = ratings[ratings['userId'].isin(similar_users) & (ratings['rating'] >= 4)]
    top_movies = similar_user_ratings['movieId'].value_counts().head(num_recommendations).index
    return movies[movies['movieId'].isin(top_movies)][['title']]

# Example: Recommend based on users who liked "Toy Story" (assuming movie ID is 1)
print("User-based Recommendations:")
print(recommend_from_similar_users(1, 5))


Recommendations:
                    title
0        Toy Story (1995)
7355   Toy Story 3 (2010)
3595      Toy, The (1982)
4089  Toy Soldiers (1991)
1570    L.A. Story (1991)
User-based Recommendations:
                                         title
0                             Toy Story (1995)
224  Star Wars: Episode IV - A New Hope (1977)
257                        Pulp Fiction (1994)
277           Shawshank Redemption, The (1994)
314                        Forrest Gump (1994)
