In [None]:
# Install required libraries
!pip install transformers torch pandas scipy requests streamlit
!pip install datasets accelerate


Collecting streamlit
  Downloading streamlit-1.47.1-py3-none-any.whl.metadata (9.0 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collect



In [None]:
import pandas as pd
import numpy as np
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
from scipy.special import softmax
import requests
import random
import warnings
warnings.filterwarnings('ignore')

print("All libraries imported successfully!")


All libraries imported successfully!


In [None]:
# Load BERT model for sentiment analysis
def load_sentiment_model():
    model_name = "cardiffnlp/twitter-roberta-base-sentiment"
    print("Loading sentiment analysis model...")
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSequenceClassification.from_pretrained(model_name)
    print("✅ Sentiment model loaded successfully!")
    return tokenizer, model

# Load the sentiment model
sentiment_tokenizer, sentiment_model = load_sentiment_model()


Loading sentiment analysis model...
✅ Sentiment model loaded successfully!


In [None]:
# Load genre classification model
def load_genre_model():
    print("Loading genre classification model...")
    try:
        genre_classifier = pipeline(
            "zero-shot-classification",
            model="facebook/bart-large-mnli"
        )
        print("✅ Genre model loaded successfully!")
        return genre_classifier
    except Exception as e:
        print(f"❌ Error loading genre model: {e}")
        return None

# Load the genre model
genre_classifier = load_genre_model()


Loading genre classification model...


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Device set to use cuda:0


✅ Genre model loaded successfully!


In [None]:
# Upload CSV files (run this cell and upload your files when prompted)
from google.colab import files

print("Please upload your CSV files:")
print("1. Upload 'top10K-TMDB-movies.csv'")
print("2. Upload 'book.csv'")

# Upload files
uploaded = files.upload()

# Check uploaded files
print("\nUploaded files:")
for filename in uploaded.keys():
    print(f"✅ {filename}")


Please upload your CSV files:
1. Upload 'top10K-TMDB-movies.csv'
2. Upload 'book.csv'


Saving book.csv to book (1).csv
Saving top10K-TMDB-movies.csv to top10K-TMDB-movies (1).csv

Uploaded files:
✅ book (1).csv
✅ top10K-TMDB-movies (1).csv


In [None]:
# Load and process datasets
def load_datasets():
    try:
        print("Loading datasets...")

        # Load movie dataset
        movies_df = pd.read_csv('top10K-TMDB-movies.csv')
        movies_df = movies_df[['id', 'title', 'genre', 'overview', 'popularity', 'vote_average']].copy()
        movies_df = movies_df.dropna(subset=['title', 'genre'])
        print(f"✅ Movies dataset loaded: {len(movies_df)} records")

        # Load book dataset
        books_df = pd.read_csv('book.csv')
        books_df = books_df[['title', 'name', 'genre', 'rating', 'synopsis']].copy()
        books_df = books_df.dropna(subset=['title', 'name'])
        print(f"✅ Books dataset loaded: {len(books_df)} records")

        return movies_df, books_df

    except FileNotFoundError as e:
        print(f"❌ Dataset not found: {e}")
        return None, None
    except Exception as e:
        print(f"❌ Error loading datasets: {e}")
        return None, None

# Load the datasets
movies_df, books_df = load_datasets()

# Display basic info about datasets
if movies_df is not None and books_df is not None:
    print(f"\n📊 Dataset Summary:")
    print(f"Movies: {len(movies_df)} records")
    print(f"Books: {len(books_df)} records")

    print(f"\n🎬 Sample Movie Data:")
    print(movies_df.head(2))

    print(f"\n📚 Sample Book Data:")
    print(books_df.head(2))


Loading datasets...
✅ Movies dataset loaded: 9997 records
✅ Books dataset loaded: 1539 records

📊 Dataset Summary:
Movies: 9997 records
Books: 1539 records

🎬 Sample Movie Data:
      id                        title                 genre  \
0    278     The Shawshank Redemption           Drama,Crime   
1  19404  Dilwale Dulhania Le Jayenge  Comedy,Drama,Romance   

                                            overview  popularity  vote_average  
0  Framed in the 1940s for the double murder of h...      94.075           8.7  
1  Raj is a rich, carefree, happy-go-lucky second...      25.408           8.7  

📚 Sample Book Data:
                                               title               name  \
0              Sapiens: A Brief History of Humankind  Yuval Noah Harari   
1  Guns, Germs, and Steel: The Fates of Human Soc...      Jared Diamond   

     genre  rating                                           synopsis  
0  history    4.39  100,000 years ago, at least six human species ... 

In [None]:
# Sentiment analysis function
def analyze_sentiment(text, tokenizer, model):
    """Analyze sentiment of input text"""

    # Preprocess text (handle mentions and links)
    tweet_words = []
    for word in text.split(' '):
        if word.startswith('@') and len(word) > 1:
            word = '@user'
        elif word.startswith('http'):
            word = "http"
        tweet_words.append(word)

    tweet_proc = " ".join(tweet_words)

    # Encode and predict
    encoded_tweet = tokenizer(tweet_proc, return_tensors='pt', max_length=512, truncation=True)

    with torch.no_grad():
        output = model(**encoded_tweet)

    scores = output[0][0].detach().numpy()
    scores = softmax(scores)

    # Map to sentiment labels
    labels = ['Negative', 'Neutral', 'Positive']
    sentiment_scores = {labels[i]: float(scores[i]) for i in range(len(labels))}
    predicted_sentiment = labels[scores.argmax()]

    return predicted_sentiment, sentiment_scores

print("✅ Sentiment analysis function defined!")


✅ Sentiment analysis function defined!


In [None]:
# Cell 4: Load Genre Classification Model (FIXED VERSION)
def load_genre_model():
    print("Loading genre classification model...")
    try:
        # Try primary model
        genre_classifier = pipeline(
            "zero-shot-classification",
            model="facebook/bart-large-mnli",
            device=0 if torch.cuda.is_available() else -1
        )
        print("✅ Genre model loaded successfully!")
        return genre_classifier
    except Exception as e:
        print(f"⚠️ Primary model failed: {e}")
        print("Trying alternative model...")
        try:
            # Fallback to smaller model
            genre_classifier = pipeline(
                "zero-shot-classification",
                model="microsoft/DialoGPT-medium",
                device=-1  # Force CPU
            )
            print("✅ Alternative genre model loaded!")
            return genre_classifier
        except Exception as e2:
            print(f"❌ All models failed: {e2}")
            return None

# Load the genre model with error handling
genre_classifier = load_genre_model()



Loading genre classification model...


Device set to use cuda:0


✅ Genre model loaded successfully!


In [None]:
# Movie recommendation function
def recommend_movies_by_sentiment_and_genre(sentiment, detected_genre, movies_df, num_recommendations=5):
    """Recommend movies based on sentiment and detected genre"""

    if movies_df is None or movies_df.empty:
        return pd.DataFrame()

    # First try to filter by detected genre
    genre_filtered = movies_df[movies_df['genre'].str.contains(detected_genre, na=False, case=False)]

    if len(genre_filtered) >= num_recommendations:
        filtered_movies = genre_filtered
    else:
        # Fallback to sentiment-based filtering
        if sentiment == 'Positive':
            filtered_movies = movies_df[movies_df['vote_average'] >= 7.0].copy()
            preferred_genres = ['Comedy', 'Family', 'Romance', 'Animation']
            for genre in preferred_genres:
                genre_movies = filtered_movies[filtered_movies['genre'].str.contains(genre, na=False, case=False)]
                if len(genre_movies) >= num_recommendations:
                    filtered_movies = genre_movies
                    break

        elif sentiment == 'Negative':
            filtered_movies = movies_df.copy()
            preferred_genres = ['Drama', 'Thriller', 'Horror', 'Crime', 'War']
            for genre in preferred_genres:
                genre_movies = filtered_movies[filtered_movies['genre'].str.contains(genre, na=False, case=False)]
                if len(genre_movies) >= num_recommendations:
                    filtered_movies = genre_movies
                    break

        else:  # Neutral
            filtered_movies = movies_df.copy()
            preferred_genres = ['Action', 'Adventure', 'Mystery', 'Sci-Fi']
            for genre in preferred_genres:
                genre_movies = filtered_movies[filtered_movies['genre'].str.contains(genre, na=False, case=False)]
                if len(genre_movies) >= num_recommendations:
                    filtered_movies = genre_movies
                    break

    # Sort by popularity and rating, then sample
    if not filtered_movies.empty:
        filtered_movies = filtered_movies.sort_values(['popularity', 'vote_average'], ascending=[False, False])
        sample_size = min(num_recommendations, len(filtered_movies))
        if len(filtered_movies) > sample_size:
            recommended_movies = filtered_movies.head(sample_size * 2).sample(n=sample_size)
        else:
            recommended_movies = filtered_movies.head(sample_size)
        return recommended_movies
    else:
        # Fallback to top popular movies
        return movies_df.nlargest(num_recommendations, 'popularity')

print("✅ Movie recommendation function defined!")


✅ Movie recommendation function defined!


In [None]:
# Book recommendation function
def recommend_books_by_sentiment_and_genre(sentiment, detected_genre, books_df, num_recommendations=5):
    """Recommend books based on sentiment and detected genre"""

    if books_df is None or books_df.empty:
        return pd.DataFrame()

    # First try to filter by detected genre
    genre_filtered = books_df[books_df['genre'].str.contains(detected_genre, na=False, case=False)]

    if len(genre_filtered) >= num_recommendations:
        filtered_books = genre_filtered
    else:
        # Fallback to sentiment-based filtering
        if sentiment == 'Positive':
            filtered_books = books_df[books_df['rating'] >= 4.0].copy()
            avoid_genres = ['horror', 'thriller', 'mystery']
            for genre in avoid_genres:
                filtered_books = filtered_books[~filtered_books['genre'].str.contains(genre, na=False, case=False)]

        elif sentiment == 'Negative':
            filtered_books = books_df.copy()
            preferred_genres = ['history', 'biography', 'psychology', 'philosophy', 'drama']
            for genre in preferred_genres:
                genre_books = filtered_books[filtered_books['genre'].str.contains(genre, na=False, case=False)]
                if len(genre_books) >= num_recommendations:
                    filtered_books = genre_books
                    break

        else:  # Neutral
            filtered_books = books_df.copy()

    # Sort by rating and sample
    if not filtered_books.empty:
        filtered_books = filtered_books.sort_values('rating', ascending=False)
        sample_size = min(num_recommendations, len(filtered_books))
        if len(filtered_books) > sample_size:
            recommended_books = filtered_books.head(sample_size * 2).sample(n=sample_size)
        else:
            recommended_books = filtered_books.head(sample_size)
        return recommended_books
    else:
        # Fallback to top rated books
        return books_df.nlargest(num_recommendations, 'rating')

print("✅ Book recommendation function defined!")


✅ Book recommendation function defined!


In [None]:
def get_recommendations(user_input, sentiment_tokenizer, sentiment_model, genre_classifier, movies_df, books_df):
    """Main function to get recommendations based on user input"""

    print("🔍 Analyzing your input...")
    print("-" * 50)

    # Analyze sentiment
    predicted_sentiment, sentiment_scores = analyze_sentiment(user_input, sentiment_tokenizer, sentiment_model)

    # Detect genre
    predicted_genre, genre_scores = detect_genre(user_input, genre_classifier)

    # Get recommendations
    recommended_movies = recommend_movies_by_sentiment_and_genre(
        predicted_sentiment, predicted_genre, movies_df, 5
    )
    recommended_books = recommend_books_by_sentiment_and_genre(
        predicted_sentiment, predicted_genre, books_df, 5
    )

    return {
        'sentiment': predicted_sentiment,
        'sentiment_scores': sentiment_scores,
        'genre': predicted_genre,
        'genre_scores': genre_scores,
        'movies': recommended_movies,
        'books': recommended_books
    }

print("✅ Main recommendation system defined!")


✅ Main recommendation system defined!


In [None]:
def display_results(results):
    """Display the analysis and recommendation results"""

    print("🎭 SENTIMENT ANALYSIS RESULTS")
    print("=" * 50)
    print(f"Detected Sentiment: {results['sentiment']}")
    print(f"Confidence: {results['sentiment_scores'][results['sentiment']]:.2%}")
    print("\nAll Sentiment Scores:")
    for sentiment, score in results['sentiment_scores'].items():
        print(f"  {sentiment}: {score:.2%}")

    print("\n🎬 GENRE DETECTION RESULTS")
    print("=" * 50)
    print(f"Detected Genre: {results['genre']}")
    if results['genre_scores']:
        print(f"Confidence: {list(results['genre_scores'].values())[0]:.2%}")
        print("\nTop Genre Predictions:")
        for genre, score in list(results['genre_scores'].items())[:3]:
            print(f"  {genre}: {score:.2%}")

    print("\n🎬 RECOMMENDED MOVIES")
    print("=" * 50)
    if not results['movies'].empty:
        for idx, (_, movie) in enumerate(results['movies'].iterrows(), 1):
            print(f"{idx}. {movie['title']}")
            print(f"   Genre: {movie['genre']}")
            print(f"   Rating: ⭐ {movie['vote_average']:.1f}")
            print(f"   Overview: {str(movie['overview'])[:100]}...")
            print()
    else:
        print("No movies found.")

    print("📚 RECOMMENDED BOOKS")
    print("=" * 50)
    if not results['books'].empty:
        for idx, (_, book) in enumerate(results['books'].iterrows(), 1):
            print(f"{idx}. {book['title']}")
            print(f"   Author: {book['name']}")
            print(f"   Genre: {book['genre']}")
            print(f"   Rating: ⭐ {book['rating']:.1f}")
            if pd.notna(book['synopsis']):
                print(f"   Synopsis: {str(book['synopsis'])[:100]}...")
            print()
    else:
        print("No books found.")

print("✅ Display function defined!")


✅ Display function defined!


In [None]:
# Cell 8: Updated Genre Detection Function with Fallback
def detect_genre(text, genre_classifier):
    """Detect genre preferences from input text with fallback"""

    if genre_classifier is None:
        # Simple keyword-based fallback
        genre_keywords = {
            'Romance': ['love', 'romantic', 'romance', 'heart', 'relationship', 'uplifting'],
            'Comedy': ['funny', 'laugh', 'comedy', 'humor', 'fun', 'great', 'happy'],
            'Action': ['action', 'fight', 'adventure', 'exciting', 'thrilling'],
            'Drama': ['sad', 'emotional', 'drama', 'serious', 'deep'],
            'Horror': ['scary', 'horror', 'fear', 'frightening'],
            'Sci-Fi': ['science', 'future', 'space', 'technology']
        }

        text_lower = text.lower()
        genre_scores = {}

        for genre, keywords in genre_keywords.items():
            score = sum(1 for keyword in keywords if keyword in text_lower) / len(keywords)
            if score > 0:
                genre_scores[genre] = score

        if genre_scores:
            best_genre = max(genre_scores.keys(), key=lambda x: genre_scores[x])
            return best_genre, genre_scores
        else:
            return "Drama", {"Drama": 0.5}

    # If genre_classifier is a function (our fallback)
    if callable(genre_classifier) and not hasattr(genre_classifier, '__call__'):
        return genre_classifier(text)

    # Define comprehensive genre labels
    genre_labels = [
        "Action", "Adventure", "Animation", "Biography", "Comedy", "Crime",
        "Documentary", "Drama", "Family", "Fantasy", "History", "Horror",
        "Music", "Mystery", "Romance", "Science Fiction", "Thriller", "War"
    ]

    try:
        # Use zero-shot classification to detect genre
        result = genre_classifier(text, genre_labels)

        # Get top genre and confidence scores
        predicted_genre = result['labels'][0] if result['labels'] else "Drama"
        genre_scores = {label: score for label, score in zip(result['labels'], result['scores'])}

        # Return top 3 genres with scores
        top_genres = {label: score for label, score in list(genre_scores.items())[:3]}

        return predicted_genre, top_genres

    except Exception as e:
        print(f"⚠️ Genre detection error: {e}")
        # Fallback to keyword detection
        return detect_genre(text, None)

print("✅ Updated genre detection function with fallback defined!")


✅ Updated genre detection function with fallback defined!


In [None]:
# Test all components before running Cell 13
print("🔍 Testing all components...")

# Check sentiment model
try:
    test_sentiment, _ = analyze_sentiment("I love her very much", sentiment_tokenizer, sentiment_model)
    print(f"✅ Sentiment model works: {test_sentiment}")
except Exception as e:
    print(f"❌ Sentiment model error: {e}")

# Check genre classifier
try:
    if genre_classifier:
        test_genre, _ = detect_genre("I want romance", genre_classifier)
        print(f"✅ Genre classifier works: {test_genre}")
    else:
        print("⚠️ No genre classifier available")
except Exception as e:
    print(f"❌ Genre classifier error: {e}")

# Check datasets
print(f"✅ Movies dataset: {len(movies_df) if 'movies_df' in locals() else 'Not loaded'}")
print(f"✅ Books dataset: {len(books_df) if 'books_df' in locals() else 'Not loaded'}")

print("\n🎯 All components ready for Cell 13!")


🔍 Testing all components...
✅ Sentiment model works: Positive
✅ Genre classifier works: Romance
✅ Movies dataset: 9997
✅ Books dataset: 1539

🎯 All components ready for Cell 13!
