In [1]:
pip install scikit-surprise

Collecting scikit-surprise
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/154.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp310-cp310-linux_x86_64.whl size=2357270 sha256=53219ede3e1a5e4dbef521f565138ef53990f3797466b8b9a6be95cd97368292
  Stored in directory: /root/.cache/pip/wheels/4b/3f/df/6acbf0a40397d9bf3ff97f582cc22fb9ce66adde75bc71fd54
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Succe

In [3]:
import pandas as pd
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy

# Load the MovieLens dataset
data = Dataset.load_builtin('ml-100k')  # or you can load your own dataset

# Define a Reader object to parse the ratings
reader = Reader(rating_scale=(1, 5))

# Load the data into a DataFrame
df = pd.DataFrame(data.raw_ratings, columns=['user_id', 'item_id', 'rating', 'timestamp'])

# Create train and test sets
trainset, testset = train_test_split(data, test_size=0.2)

# Use SVD for collaborative filtering
model = SVD()
model.fit(trainset)

# Make predictions
predictions = model.test(testset)

# Evaluate accuracy
accuracy.rmse(predictions)

# Function to get movie recommendations for a user
def get_movie_recommendations(user_id, n_recommendations=5):
    user_items = df[df['user_id'] == user_id]['item_id'].unique()
    all_items = df['item_id'].unique()

    # Get unrated items
    unrated_items = [item for item in all_items if item not in user_items]

    # Predict ratings for unrated items
    predicted_ratings = [(item, model.predict(user_id, item).est) for item in unrated_items]

    # Sort and return top N recommendations
    recommended_items = sorted(predicted_ratings, key=lambda x: x[1], reverse=True)[:n_recommendations]
    return recommended_items

# Example usage
user_id = 196  # Change this to the user ID you want to get recommendations for
recommendations = get_movie_recommendations(user_id)
print("Recommended movies:", recommendations)


RMSE: 0.9431
Recommended movies: [('318', 4.588824032198449), ('483', 4.577163030244101), ('114', 4.535748693563354), ('169', 4.53049124009552), ('408', 4.529805123692133)]


In [4]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

# Sample movie data
movies = pd.DataFrame({
    'title': ['Movie A', 'Movie B', 'Movie C', 'Movie D'],
    'description': [
        'A thrilling adventure in the wild.',
        'A heartwarming love story.',
        'A documentary about nature.',
        'An action-packed thriller.'
    ]
})

# Create TF-IDF vectors for the descriptions
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies['description'])

# Compute the cosine similarity matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

# Function to get movie recommendations based on a title
def get_content_recommendations(title, n_recommendations=2):
    idx = movies.index[movies['title'] == title].tolist()[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the n most similar movies
    sim_scores = sim_scores[1:n_recommendations + 1]
    movie_indices = [i[0] for i in sim_scores]

    return movies['title'].iloc[movie_indices]

# Example usage
recommended_movies = get_content_recommendations('Movie A')
print("Recommended movies:", recommended_movies.tolist())


Recommended movies: ['Movie B', 'Movie C']
