<a href="https://colab.research.google.com/github/Manikantareddy4567/treh/blob/main/Recommendation_system.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install pandas numpy scikit-learn scipy surprise

Collecting surprise
  Downloading surprise-0.1-py2.py3-none-any.whl.metadata (327 bytes)
Collecting scikit-surprise (from surprise)
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Downloading surprise-0.1-py2.py3-none-any.whl (1.8 kB)
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp311-cp311-linux_x86_64.whl size=2505185 sha256=353c2659ae08ca1d1bf0790472609d5378fe12daca5920e307e2c2f4c1544cec
  Stored in directory: /root/.cache/pip/wheels/2a/8f/6e/7e2899163e2d85d8266daab4aa1cdabec7a6c56f83c015b5af
Successfully built scikit-surprise
Install

In [19]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Dot, Flatten, Dense
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler
from tabulate import tabulate
import duckdb
import matplotlib.pyplot as plt

def load_data():
    print("Loading data...")
    genome_scores_df = pd.read_csv('/content/genome_scores.csv')
    tag_relevance = genome_scores_df.pivot(index='movieId', columns='tagId', values='relevance').fillna(0)
    scaler = MinMaxScaler()
    tag_relevance_scaled = pd.DataFrame(scaler.fit_transform(tag_relevance), index=tag_relevance.index, columns=tag_relevance.columns)
    print("Data successfully loaded and scaled.")
    return tag_relevance_scaled

def compute_similarity(tag_relevance):
    print("Computing similarity matrix...")
    similarity = cosine_similarity(tag_relevance, tag_relevance)
    print("Similarity matrix computed successfully.")
    return similarity

def recommend_movie(movie_id, tag_relevance, similarity_matrix, movie_titles=None, n=10):
    if movie_id not in tag_relevance.index:
        return "Movie ID not found in dataset."

    print(f"Finding recommendations for Movie ID: {movie_id}...")
    idx = tag_relevance.index.get_loc(movie_id)
    sim_scores = list(enumerate(similarity_matrix[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:n+1]
    movie_indices = [int(tag_relevance.index[i[0]]) for i in sim_scores]

    results = [[i+1, movie, movie_titles.get(movie, "Unknown Movie")] if movie_titles else [i+1, movie] for i, movie in enumerate(movie_indices)]
    headers = ["Rank", "Recommended Movie ID", "Movie Title"] if movie_titles else ["Rank", "Recommended Movie ID"]
    print(tabulate(results, headers=headers, tablefmt="fancy_grid"))
    return movie_indices

def load_movie_titles():
    try:
        movies_df = pd.read_csv('/mnt/data/movies.csv')
        return dict(zip(movies_df['movieId'], movies_df['title']))
    except Exception as e:
        print(f"Error loading movie titles: {e}")
        return {}

def build_model(num_users, num_movies, embedding_size=50):
    user_input = Input(shape=(1,))
    movie_input = Input(shape=(1,))
    user_embedding = Embedding(input_dim=num_users, output_dim=embedding_size)(user_input)
    movie_embedding = Embedding(input_dim=num_movies, output_dim=embedding_size)(movie_input)
    dot_product = Dot(axes=2)([user_embedding, movie_embedding])
    output = Flatten()(dot_product)
    model = Model(inputs=[user_input, movie_input], outputs=output)
    model.compile(optimizer='adam', loss='mse')
    return model

def visualize_recommendations(recommendations, movie_titles):
    if not movie_titles:
        print("No movie titles available for visualization.")
        return
    labels = [movie_titles.get(m, str(m)) for m in recommendations]
    scores = np.linspace(10, 1, len(recommendations))
    plt.figure(figsize=(10, 5))
    plt.barh(labels[::-1], scores[::-1], color='skyblue')
    plt.xlabel("Recommendation Score")
    plt.ylabel("Movie Title")
    plt.title("Top Recommended Movies")
    plt.show()

tag_relevance = load_data()
similarity_matrix = compute_similarity(tag_relevance)
movie_titles = load_movie_titles()
recommended_movies = recommend_movie(1, tag_relevance, similarity_matrix, movie_titles)
if movie_titles:
    visualize_recommendations(recommended_movies, movie_titles)


Loading data...
Data successfully loaded and scaled.
Computing similarity matrix...
Similarity matrix computed successfully.
Error loading movie titles: [Errno 2] No such file or directory: '/mnt/data/movies.csv'
Finding recommendations for Movie ID: 1...
╒════════╤════════════════════════╕
│   Rank │   Recommended Movie ID │
╞════════╪════════════════════════╡
│      1 │                   4886 │
├────────┼────────────────────────┤
│      2 │                   3114 │
├────────┼────────────────────────┤
│      3 │                   2355 │
├────────┼────────────────────────┤
│      4 │                   6377 │
├────────┼────────────────────────┤
│      5 │                  78499 │
├────────┼────────────────────────┤
│      6 │                  50872 │
├────────┼────────────────────────┤
│      7 │                   5218 │
├────────┼────────────────────────┤
│      8 │                   4306 │
├────────┼────────────────────────┤
│      9 │                  68954 │
├────────┼──────────────