In [1]:
!pip install spotipy transformers torch scikit-learn pandas numpy

Collecting spotipy
  Downloading spotipy-2.25.1-py3-none-any.whl.metadata (5.1 kB)
Collecting redis>=3.5.3 (from spotipy)
  Downloading redis-6.2.0-py3-none-any.whl.metadata (10 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch)
  Downloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cusparse-cu12==12.3.1.170 (from torch)
  Downloading nvidia_c

In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
import json
import os
import re
from typing import Dict, List, Optional, Tuple
import warnings
warnings.filterwarnings('ignore')

try:
    from transformers import pipeline
    TRANSFORMERS_AVAILABLE = True
except ImportError:
    TRANSFORMERS_AVAILABLE = False
    print("Warning: transformers library not available. Using keyword-based emotion detection.")

try:
    from langdetect import detect
    LANGDETECT_AVAILABLE = True
except ImportError:
    LANGDETECT_AVAILABLE = False
    print("Warning: langdetect library not available. Language detection will be basic.")

class EnhancedMoodMusicRecommender:
    def __init__(self, dataset_path: str = None):
        self.emotion_classifier = self._load_emotion_model()
        self.song_database = None
        self.dataset_path = dataset_path or '/kaggle/input/spotify/SpotifyFeatures.csv'
        self.scaler = StandardScaler()
        
        self.emotion_mapping = {
            'joy': {'valence': 0.8, 'energy': 0.7, 'danceability': 0.8, 'tempo': 120, 'loudness': -8},
            'happiness': {'valence': 0.8, 'energy': 0.7, 'danceability': 0.8, 'tempo': 120, 'loudness': -8},
            'sadness': {'valence': 0.2, 'energy': 0.3, 'danceability': 0.3, 'tempo': 70, 'loudness': -15},
            'anger': {'valence': 0.3, 'energy': 0.9, 'danceability': 0.5, 'tempo': 140, 'loudness': -5},
            'fear': {'valence': 0.2, 'energy': 0.6, 'danceability': 0.2, 'tempo': 100, 'loudness': -12},
            'surprise': {'valence': 0.6, 'energy': 0.7, 'danceability': 0.6, 'tempo': 110, 'loudness': -8},
            'disgust': {'valence': 0.3, 'energy': 0.4, 'danceability': 0.3, 'tempo': 90, 'loudness': -12},
            'love': {'valence': 0.9, 'energy': 0.5, 'danceability': 0.6, 'tempo': 80, 'loudness': -10},
            'calm': {'valence': 0.6, 'energy': 0.3, 'danceability': 0.4, 'tempo': 80, 'loudness': -12},
            'excited': {'valence': 0.8, 'energy': 0.9, 'danceability': 0.8, 'tempo': 130, 'loudness': -6},
            'melancholy': {'valence': 0.3, 'energy': 0.4, 'danceability': 0.3, 'tempo': 75, 'loudness': -14},
            'romantic': {'valence': 0.7, 'energy': 0.4, 'danceability': 0.5, 'tempo': 85, 'loudness': -10}
        }
        
        self.english_emotion_keywords = {
            'joy': ['happy', 'joyful', 'cheerful', 'upbeat', 'glad', 'delighted', 'elated', 'blissful'],
            'happiness': ['happy', 'joyful', 'cheerful', 'upbeat', 'glad', 'delighted', 'elated'],
            'sadness': ['sad', 'depressed', 'melancholy', 'down', 'blue', 'gloomy', 'sorrowful', 'heartbroken'],
            'anger': ['angry', 'mad', 'furious', 'irritated', 'annoyed', 'rage', 'frustrated', 'livid'],
            'fear': ['scared', 'afraid', 'anxious', 'worried', 'nervous', 'frightened', 'terrified'],
            'love': ['love', 'romantic', 'affection', 'adore', 'crush', 'heart', 'valentine', 'romance'],
            'calm': ['calm', 'peaceful', 'relaxed', 'serene', 'tranquil', 'chill', 'mellow', 'zen'],
            'excited': ['excited', 'thrilled', 'pumped', 'energetic', 'enthusiastic', 'hyper', 'ecstatic'],
            'melancholy': ['melancholy', 'wistful', 'nostalgic', 'pensive', 'contemplative', 'bittersweet']
        }
        
        self.hindi_emotion_keywords = {
            'joy': ['khushi', 'khush', 'prasanna', 'anand', 'harsha', 'ullas', 'mastikhana', 'maza'],
            'happiness': ['khushi', 'khush', 'prasanna', 'anand', 'harsha', 'sukh', 'santosh'],
            'sadness': ['udas', 'gam', 'dukh', 'pareshani', 'vishad', 'melancholic', 'rona', 'aansu'],
            'anger': ['gussa', 'krodh', 'naraaz', 'chidh', 'irritate', 'frustrate', 'rage'],
            'fear': ['dar', 'bhay', 'ghabrana', 'chinta', 'pareshani', 'nervous', 'tension'],
            'love': ['pyaar', 'mohabbat', 'ishq', 'prem', 'dil', 'romantic', 'romance', 'chahat'],
            'calm': ['shant', 'peaceful', 'sukoon', 'aram', 'relax', 'chill', 'shaanti'],
            'excited': ['josh', 'utsah', 'energy', 'pumped', 'thrill', 'excited', 'jalwa'],
            'melancholy': ['udaasi', 'yaadein', 'nostalgic', 'purane', 'beet gaye din', 'khamoshi']
        }
        
        self.hindi_indicators = [
            'bollywood', 'hindi', 'indian', 'bhangra', 'classical indian', 'devotional',
            'sufi', 'ghazal', 'qawwali', 'filmi', 'punjabi', 'regional indian'
        ]
        
        self.english_indicators = [
            'pop', 'rock', 'jazz', 'blues', 'country', 'folk', 'alternative', 'indie',
            'electronic', 'dance', 'hip-hop', 'rap', 'r&b', 'soul', 'funk', 'reggae'
        ]
    
    def _load_emotion_model(self):
        if not TRANSFORMERS_AVAILABLE:
            print("Using keyword-based emotion detection...")
            return None
        
        try:
            print("Loading emotion detection model...")
            return pipeline(
                "text-classification",
                model="j-hartmann/emotion-english-distilroberta-base",
                return_all_scores=True
            )
        except Exception as e:
            print(f"Could not load emotion model: {e}")
            print("Falling back to keyword-based emotion detection...")
            return None
    
    def detect_language(self, text: str) -> str:
        try:
            if LANGDETECT_AVAILABLE:
                detected = detect(text.lower())
                if detected in ['hi', 'ur']:  
                    return 'hindi'
                elif detected == 'en':
                    return 'english'
            
            text_lower = text.lower()
            hindi_count = sum(1 for keywords in self.hindi_emotion_keywords.values() 
                             for keyword in keywords if keyword in text_lower)
            english_count = sum(1 for keywords in self.english_emotion_keywords.values() 
                               for keyword in keywords if keyword in text_lower)
            
            if hindi_count > english_count:
                return 'hindi'
            else:
                return 'english'
                
        except Exception:
            return 'english'  
    
    def detect_emotion_keywords_multilingual(self, text: str, language: str = None) -> Dict[str, float]:
        if language is None:
            language = self.detect_language(text)
        
        text_lower = text.lower()
        emotion_scores = {}
        
        if language == 'hindi':
            keyword_sets = [self.hindi_emotion_keywords, self.english_emotion_keywords]
            print(f"Language detected: Hindi/Hinglish")
        else:
            keyword_sets = [self.english_emotion_keywords, self.hindi_emotion_keywords]
            print(f"Language detected: English")
        
        for i, keywords_dict in enumerate(keyword_sets):
            weight = 1.0 if i == 0 else 0.5  # Higher weight for primary language
            
            for emotion, keywords in keywords_dict.items():
                if emotion not in emotion_scores:
                    emotion_scores[emotion] = 0
                
                score = 0
                for keyword in keywords:
                    if keyword in text_lower:
                        score += 1
                
                emotion_scores[emotion] += (score / len(keywords)) * weight
        
        if not any(emotion_scores.values()):
            emotion_scores = {'joy': 0.5, 'calm': 0.5}
        
        total_score = sum(emotion_scores.values())
        if total_score > 0:
            emotion_scores = {k: min(v/total_score, 1.0) for k, v in emotion_scores.items()}
        
        return emotion_scores
    
    def classify_song_language(self, song_row: pd.Series) -> str:
        if 'genre' in song_row.index and pd.notna(song_row['genre']):
            genre_lower = str(song_row['genre']).lower()
            for indicator in self.hindi_indicators:
                if indicator in genre_lower:
                    return 'hindi'
            for indicator in self.english_indicators:
                if indicator in genre_lower:
                    return 'english'
        
        if 'artist' in song_row.index and pd.notna(song_row['artist']):
            artist_lower = str(song_row['artist']).lower()
            
            hindi_patterns = ['kumar', 'singh', 'sharma', 'khan', 'chopra', 'kapoor', 'arijit', 
                             'shreya', 'kishore', 'lata', 'asha', 'udit', 'alka', 'sunidhi']
            
            for pattern in hindi_patterns:
                if pattern in artist_lower:
                    return 'hindi'
        
        if 'name' in song_row.index and pd.notna(song_row['name']):
            name_lower = str(song_row['name']).lower()
            
            hindi_song_patterns = ['tere', 'mera', 'tera', 'dil', 'pyaar', 'ishq', 'mohabbat', 
                                  'saath', 'zindagi', 'sapna', 'khushi', 'gam']
            
            for pattern in hindi_song_patterns:
                if pattern in name_lower:
                    return 'hindi'
        
        return 'mixed'  
    
    def load_dataset(self, dataset_path: str = None) -> pd.DataFrame:
        if dataset_path:
            self.dataset_path = dataset_path
        
        if not self.dataset_path:
            raise ValueError("Dataset path not provided")
        
        if not os.path.exists(self.dataset_path):
            raise FileNotFoundError(f"Dataset file not found: {self.dataset_path}")
        
        try:
            print(f"Loading dataset from: {self.dataset_path}")
            
            encodings = ['utf-8', 'latin-1', 'cp1252']
            df = None
            
            for encoding in encodings:
                try:
                    df = pd.read_csv(self.dataset_path, encoding=encoding)
                    print(f"Successfully loaded dataset with {encoding} encoding")
                    break
                except UnicodeDecodeError:
                    continue
            
            if df is None:
                raise ValueError("Could not read CSV file with any encoding")
            
            print(f"Dataset loaded: {len(df)} tracks")
            
            df = self._preprocess_dataset(df)
            
            print("Classifying songs by language...")
            df['language'] = df.apply(self.classify_song_language, axis=1)
            
            # Count language distribution
            lang_counts = df['language'].value_counts()
            print(f"Language distribution:")
            for lang, count in lang_counts.items():
                print(f"   {lang.title()}: {count:,} songs")
            
            self.song_database = df
            
            print(f"Dataset processed successfully with multilingual support!")
            return self.song_database
            
        except Exception as e:
            print(f"Error loading dataset: {e}")
            raise
    
    def _preprocess_dataset(self, df: pd.DataFrame) -> pd.DataFrame:
        column_mapping = {
            'track_name': 'name',
            'track_id': 'id',
            'artists': 'artist',
            'track_genre': 'genre',
            'track_artist': 'artist',
            'artist_name': 'artist',
            'song_name': 'name',
            'title': 'name'
        }
        
        for old_name, new_name in column_mapping.items():
            if old_name in df.columns and new_name not in df.columns:
                df = df.rename(columns={old_name: new_name})
        
        required_cols = ['valence', 'energy', 'danceability', 'tempo']
        missing_cols = [col for col in required_cols if col not in df.columns]
        
        if missing_cols:
            defaults = {'valence': 0.5, 'energy': 0.5, 'danceability': 0.5, 'tempo': 120}
            for col in missing_cols:
                df[col] = defaults.get(col, 0.5)
        
        if 'name' not in df.columns:
            df['name'] = 'Unknown Track ' + df.index.astype(str)
        if 'artist' not in df.columns:
            df['artist'] = 'Unknown Artist'
        
        df = df.dropna(subset=['name'])
        df['artist'] = df['artist'].fillna('Unknown Artist')
        df = df.drop_duplicates(subset=['name', 'artist'])
        
        audio_features = ['valence', 'energy', 'danceability', 'acousticness', 
                         'instrumentalness', 'speechiness', 'liveness']
        
        for col in audio_features:
            if col in df.columns:
                df[col] = pd.to_numeric(df[col], errors='coerce')
                df[col] = df[col].fillna(0.5)
                df[col] = df[col].clip(0, 1)
        
        if 'tempo' in df.columns:
            df['tempo'] = pd.to_numeric(df['tempo'], errors='coerce')
            df['tempo'] = df['tempo'].fillna(120)
            df['tempo'] = df['tempo'].clip(60, 200)
        
        if 'loudness' in df.columns:
            df['loudness'] = pd.to_numeric(df['loudness'], errors='coerce')
            df['loudness'] = df['loudness'].fillna(-10)
            df['loudness'] = df['loudness'].clip(-60, 0)
        
        if 'id' not in df.columns:
            df['id'] = range(len(df))
        
        if 'genre' not in df.columns:
            df['genre'] = 'unknown'
        else:
            df['genre'] = df['genre'].fillna('unknown')
        
        return df.reset_index(drop=True)
    
    def detect_emotion(self, text: str) -> Dict[str, float]:
        language = self.detect_language(text)
        
        if self.emotion_classifier and language == 'english':
            try:
                results = self.emotion_classifier(text)
                return {result['label'].lower(): result['score'] for result in results[0]}
            except Exception as e:
                print(f"Error in AI emotion detection: {e}")
        
        return self.detect_emotion_keywords_multilingual(text, language)
    
    def recommend_songs(self, mood_text: str, n_recommendations: int = 10, 
                       language_preference: str = 'both', filter_genre: str = None, 
                       min_score: float = 0.0) -> pd.DataFrame:

        if self.song_database is None:
            raise ValueError("Dataset not loaded. Call load_dataset() first.")
        
        emotion_scores = self.detect_emotion(mood_text)
        dominant_emotion = max(emotion_scores, key=emotion_scores.get)
        detected_language = self.detect_language(mood_text)
        
        print(f"Detected emotion: {dominant_emotion.upper()}")
        print(f"Input language: {detected_language.title()}")
        print(f"Emotion confidence: {emotion_scores.get(dominant_emotion, 0):.3f}")
        
        df = self.song_database.copy()
        
        if language_preference != 'both':
            if language_preference == 'hindi':
                df = df[df['language'].isin(['hindi', 'mixed'])]
                print(f"Filtering for Hindi songs: {len(df)} available")
            elif language_preference == 'english':
                df = df[df['language'].isin(['english', 'mixed'])]
                print(f"Filtering for English songs: {len(df)} available")
        else:
            print(f"Using all languages: {len(df)} songs available")
        
        if language_preference == 'both' and detected_language:

            df['language_bonus'] = df['language'].apply(
                lambda x: 0.1 if x == detected_language else 0.05 if x == 'mixed' else 0
            )
        else:
            df['language_bonus'] = 0
        
        if filter_genre:
            genre_mask = df['genre'].str.contains(filter_genre, case=False, na=False)
            df = df[genre_mask]
            if len(df) == 0:
                print(f"No songs found for genre: {filter_genre}")
                df = self.song_database.copy()
            else:
                print(f"Filtered to {len(df)} songs for genre: {filter_genre}")
        
        match_scores = []
        for idx, song in df.iterrows():
            try:
                base_score = self.calculate_match_score(song, dominant_emotion)

                final_score = base_score + song.get('language_bonus', 0)
                match_scores.append(min(final_score, 1.0))  
            except Exception as e:
                match_scores.append(0.0)
        
        df['match_score'] = match_scores
        
        if min_score > 0:
            df = df[df['match_score'] >= min_score]
        
        if len(df) == 0:
            print(f"No songs found with score >= {min_score}. Lowering threshold...")
            df = self.song_database.copy()
            df['match_score'] = [self.calculate_match_score(song, dominant_emotion) 
                               for _, song in df.iterrows()]
        
        recommendations = df.nlargest(min(n_recommendations, len(df)), 'match_score')
        
        output_cols = ['name', 'artist', 'match_score', 'language']
        optional_cols = ['genre', 'valence', 'energy', 'danceability', 'tempo']
        
        for col in optional_cols:
            if col in recommendations.columns:
                output_cols.append(col)
        
        result = recommendations[output_cols].copy()
        
        if 'language_bonus' in result.columns:
            result = result.drop('language_bonus', axis=1)
        
        numeric_cols = result.select_dtypes(include=[np.number]).columns
        result[numeric_cols] = result[numeric_cols].round(3)
        
        result = result.reset_index(drop=True)
        
        if 'language' in result.columns:
            lang_dist = result['language'].value_counts()
            print(f"Recommendations by language: {dict(lang_dist)}")
        
        print(f"Found {len(result)} recommendations")
        return result
    
    def calculate_match_score(self, song: pd.Series, target_emotion: str) -> float:
        if target_emotion not in self.emotion_mapping:
            similar_emotions = {
                'happy': 'joy',
                'sad': 'sadness',
                'mad': 'anger',
                'peaceful': 'calm',
                'energetic': 'excited'
            }
            target_emotion = similar_emotions.get(target_emotion, 'joy')
        
        target = self.emotion_mapping[target_emotion]
        
        weights = {
            'valence': 0.35,
            'energy': 0.25,
            'danceability': 0.20,
            'tempo': 0.15,
            'loudness': 0.05
        }
        
        score = 0
        total_weight = 0
        for feature, weight in weights.items():
            if feature in song.index and not pd.isna(song[feature]):
                if feature == 'tempo':
                    target_tempo = target.get(feature, 120)
                    diff = abs(song[feature] - target_tempo) / 140
                    feature_score = max(0, 1 - diff)
                elif feature == 'loudness':
                    target_loudness = target.get(feature, -10)
                    diff = abs(song[feature] - target_loudness) / 60
                    feature_score = max(0, 1 - diff)
                else:
                    target_val = target.get(feature, 0.5)
                    feature_score = 1 - abs(song[feature] - target_val)
                
                score += feature_score * weight
                total_weight += weight
        
        if total_weight > 0:
            score = score / total_weight
        else:
            score = 0.5
        
        return min(max(score, 0), 1)
    
    def get_language_stats(self) -> Dict:
        if self.song_database is None:
            return {"error": "No dataset loaded"}
        
        stats = {
            "total_songs": len(self.song_database),
            "language_distribution": dict(self.song_database['language'].value_counts()),
            "hindi_genres": [],
            "english_genres": []
        }
        
        if 'genre' in self.song_database.columns:
            hindi_songs = self.song_database[self.song_database['language'] == 'hindi']
            english_songs = self.song_database[self.song_database['language'] == 'english']
            
            if len(hindi_songs) > 0:
                stats["hindi_genres"] = list(hindi_songs['genre'].value_counts().head(5).index)
            
            if len(english_songs) > 0:
                stats["english_genres"] = list(english_songs['genre'].value_counts().head(5).index)
        
        return stats


def create_enhanced_recommender(dataset_path: str) -> EnhancedMoodMusicRecommender:
    try:
        recommender = EnhancedMoodMusicRecommender(dataset_path)
        recommender.load_dataset()
        return recommender
    except Exception as e:
        print(f"Error creating recommender: {e}")
        raise

def get_multilingual_recommendations(recommender: EnhancedMoodMusicRecommender, 
                                   mood: str, count: int = 10, 
                                   language: str = 'both', genre: str = None) -> pd.DataFrame:
    return recommender.recommend_songs(mood, count, language, genre)


def run_interactive_recommender(dataset_path: str):
    print("ENHANCED MULTILINGUAL MUSIC RECOMMENDATION SYSTEM")
    print("Supporting English & Hindi Songs")
    print("=" * 60)
    
    try:
        print("Loading music database...")
        recommender = create_enhanced_recommender(dataset_path)
        
        lang_stats = recommender.get_language_stats()
        if "error" not in lang_stats:
            print(f"\n Dataset Statistics:")
            print(f"   Total songs: {lang_stats['total_songs']:,}")
            print(f"   Language distribution: {lang_stats['language_distribution']}")
            if lang_stats['hindi_genres']:
                print(f"   Top Hindi genres: {', '.join(lang_stats['hindi_genres'][:3])}")
            if lang_stats['english_genres']:
                print(f"   Top English genres: {', '.join(lang_stats['english_genres'][:3])}")
        
        print(f"\n{'='*60}")
        print("Tell me how you're feeling and I'll recommend music!")
        print("Examples:")
        print(" • English: 'I'm feeling happy and want to dance'")
        print(" • Hindi: 'Main udas hun, kuch sad songs chahiye'")
        print(" • Mixed: 'Feeling romantic, chahiye kuch pyaar wale gaane'")
        print(f"{'='*60}")
        
        while True:
            try:
                print(f"\n How are you feeling right now?")
                mood_input = input(" Your mood: ").strip()
                
                if not mood_input:
                    print("  Please describe your mood!")
                    continue
                
                if mood_input.lower() in ['quit', 'exit', 'bye', 'stop']:
                    print(" Thanks for using the music recommender! Goodbye!")
                    break
                
                print(f"\n Language preference:")
                print("1. Both English & Hindi")
                print("2. English only") 
                print("3. Hindi only")
                
                lang_choice = input(" Choose (1/2/3) or press Enter for both: ").strip()
                
                if lang_choice == '2':
                    language_pref = 'english'
                elif lang_choice == '3':
                    language_pref = 'hindi'
                else:
                    language_pref = 'both'
                
                try:
                    num_songs = input(" How many songs? (default: 5): ").strip()
                    num_recommendations = int(num_songs) if num_songs else 5
                    num_recommendations = max(1, min(num_recommendations, 20))  # Limit between 1-20
                except ValueError:
                    num_recommendations = 5
                
                genre_filter = input(" Any specific genre? (optional, press Enter to skip): ").strip()
                genre_filter = genre_filter if genre_filter else None
                
                print(f"\n{'='*60}")
                print(f"Analyzing your mood: '{mood_input}'")
                print(f"Language: {language_pref.title()}")
                if genre_filter:
                    print(f"Genre filter: {genre_filter}")
                print(f"{'='*60}")
                
                recommendations = get_multilingual_recommendations(
                    recommender, mood_input, num_recommendations, language_pref, genre_filter
                )
                
                if len(recommendations) > 0:
                    print(f"\nHERE ARE YOUR PERSONALIZED RECOMMENDATIONS:")
                    print("-" * 60)
                    
                    for i, row in recommendations.iterrows():
                        if row.get('language') == 'hindi':
                            lang_flag = "🇮🇳"
                        elif row.get('language') == 'english':
                            lang_flag = "🇺🇸"
                        else:
                            lang_flag = "🌍"

                        print(f"{i+1:2d}. {lang_flag} {row['name']}")
                        print(f"     Artist: {row['artist']}")
                        print(f"     Match Score: {row['match_score']:.1%}")
                        
                        if 'genre' in row.index and pd.notna(row['genre']):
                            print(f"     Genre: {row['genre']}")
                        
                        audio_info = []
                        if 'valence' in row.index and pd.notna(row['valence']):
                            mood_desc = "Happy" if row['valence'] > 0.6 else "Sad" if row['valence'] < 0.4 else "Neutral"
                            audio_info.append(f"Mood: {mood_desc}")
                        
                        if 'energy' in row.index and pd.notna(row['energy']):
                            energy_desc = "High Energy" if row['energy'] > 0.6 else "Low Energy" if row['energy'] < 0.4 else "Medium Energy"
                            audio_info.append(f"Energy: {energy_desc}")
                        
                        if audio_info:
                            print(f"      {' | '.join(audio_info)}")
                        
                        print()  
                    
                    save_choice = input(" Save these recommendations to a file? (y/n): ").strip().lower()
                    if save_choice in ['y', 'yes']:
                        try:
                            filename = f"music_recommendations_{pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')}.csv"
                            recommendations.to_csv(filename, index=False)
                            print(f" Recommendations saved to {filename}")
                        except Exception as e:
                            print(f" Error saving file: {e}")
                
                else:
                    print(" Sorry, no recommendations found for your mood.")
                    print(" Try:")
                    print("  • Using different mood words")
                    print("  • Changing language preference")
                    print("  • Removing genre filter")
                
                print(f"\n{'='*60}")
                continue_choice = input(" Want more recommendations? (y/n): ").strip().lower()
                if continue_choice not in ['y', 'yes']:
                    print(" Thanks for using the music recommender! Enjoy your music!")
                    break
                    
            except KeyboardInterrupt:
                print(f"\n\n Goodbye! Hope you found some great music!")
                break
            except Exception as e:
                print(f" Error: {e}")
                print(" Let's try again...")
                continue
    
    except Exception as e:
        print(f" Failed to initialize recommender: {e}")
        print("\n Troubleshooting:")
        print("1. Check if the dataset file exists")
        print("2. Ensure the CSV has required columns (valence, energy, etc.)")
        print("3. Verify file permissions")


def get_user_mood_input():
    print("\n Describe your current mood or what type of music you want:")
    print(" You can use English, Hindi, or mix both languages!")
    print(" Examples:")
    print("  • 'I want upbeat dance music'")
    print("  • 'Main romantic songs sunna chahta hun'")
    print("  • 'Feeling sad, need some slow songs'")
    print("  • 'Party mood mein hun, energetic music do'")
    
    while True:
        mood = input("\n Your mood: ").strip()
        if mood:
            return mood
        print("Please tell me how you're feeling!")


def display_mood_examples():
    examples = {
        " Happy/Energetic": [
            "I'm feeling super happy and energetic!",
            "Main bahut khush hun, dance karne ka mann hai",
            "Want some upbeat party music"
        ],
        " Sad/Melancholic": [
            "Feeling really sad today",
            "Dil udas hai, kuch slow songs chahiye", 
            "Need some emotional music"
        ],
        " Romantic/Love": [
            "In a romantic mood",
            "Pyaar mein hun, romantic gaane sunne ka mann hai",
            "Want some love songs"
        ],
        " Calm/Peaceful": [
            "Want to relax and chill",
            "Shaanti chahiye, kuch peaceful music do",
            "Need some calming music"
        ]
    }
    
    print("\n MOOD INPUT EXAMPLES:")
    print("=" * 40)
    for category, examples_list in examples.items():
        print(f"\n{category}:")
        for example in examples_list:
            print(f"   • '{example}'")
    print("=" * 40)


if __name__ == "__main__":
    DATASET_PATH = "/kaggle/input/spotify/SpotifyFeatures.csv"
    
    print("MULTILINGUAL MUSIC RECOMMENDATION SYSTEM")
    print("Supporting English & Hindi Songs")
    print("=" * 50)
    
    if os.path.exists(DATASET_PATH):
        display_mood_examples()

        run_interactive_recommender(DATASET_PATH)
    else:
        print(f" Dataset file not found: {DATASET_PATH}")
        print("\n Setup Instructions:")
        print("1. Download a Spotify tracks dataset from Kaggle")
        print("2. Make sure it includes diverse genres (Bollywood, Hindi, Indian, etc.)")
        print("3. Update the DATASET_PATH variable with your file path")
        print("4. Install required packages:")
        print("   pip install transformers torch scikit-learn pandas numpy langdetect")
        print("5. Run the script again")
        
        print("\n Alternative usage:")
        print("recommender = create_enhanced_recommender('your/dataset/path.csv')")
        print("recommendations = get_multilingual_recommendations(recommender, 'your mood', 5, 'both')")

2025-06-21 03:30:39.864894: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1750476640.328158      35 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1750476640.436308      35 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


MULTILINGUAL MUSIC RECOMMENDATION SYSTEM
Supporting English & Hindi Songs

 MOOD INPUT EXAMPLES:

 Happy/Energetic:
   • 'I'm feeling super happy and energetic!'
   • 'Main bahut khush hun, dance karne ka mann hai'
   • 'Want some upbeat party music'

 Sad/Melancholic:
   • 'Feeling really sad today'
   • 'Dil udas hai, kuch slow songs chahiye'
   • 'Need some emotional music'

 Romantic/Love:
   • 'In a romantic mood'
   • 'Pyaar mein hun, romantic gaane sunne ka mann hai'
   • 'Want some love songs'

 Calm/Peaceful:
   • 'Want to relax and chill'
   • 'Shaanti chahiye, kuch peaceful music do'
   • 'Need some calming music'
ENHANCED MULTILINGUAL MUSIC RECOMMENDATION SYSTEM
Supporting English & Hindi Songs
Loading music database...
Loading emotion detection model...


config.json:   0%|          | 0.00/1.00k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/329M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/294 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/329M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Device set to use cuda:0


Loading dataset from: /kaggle/input/spotify/SpotifyFeatures.csv
Successfully loaded dataset with utf-8 encoding
Dataset loaded: 232725 tracks
Classifying songs by language...
Language distribution:
   English: 101,882 songs
   Mixed: 73,360 songs
   Hindi: 1,271 songs
Dataset processed successfully with multilingual support!

 Dataset Statistics:
   Total songs: 176,513
   Language distribution: {'english': 101882, 'mixed': 73360, 'hindi': 1271}
   Top Hindi genres: Movie, Classical, Comedy
   Top English genres: Electronic, Alternative, Reggae

Tell me how you're feeling and I'll recommend music!
Examples:
 • English: 'I'm feeling happy and want to dance'
 • Hindi: 'Main udas hun, kuch sad songs chahiye'
 • Mixed: 'Feeling romantic, chahiye kuch pyaar wale gaane'

 How are you feeling right now?


 Your mood:  Im feeling motivated like creed



 Language preference:
1. Both English & Hindi
2. English only
3. Hindi only


 Choose (1/2/3) or press Enter for both:  2
 How many songs? (default: 5):  5
 Any specific genre? (optional, press Enter to skip):  



Analyzing your mood: 'Im feeling motivated like creed'
Language: English
🎭 Detected emotion: JOY
🌐 Input language: English
📊 Emotion confidence: 0.991
🎵 Filtering for English songs: 175242 available
Recommendations by language: {'english': 5}
Found 5 recommendations

HERE ARE YOUR PERSONALIZED RECOMMENDATIONS:
------------------------------------------------------------
 1. 🇺🇸 Talk
     Artist: Why Don't We
     Match Score: 99.6%
     Genre: Dance
      Mood: Happy | Energy: High Energy

 2. 🇺🇸 I Got This
     Artist: Jerrod Niemann
     Match Score: 99.3%
     Genre: Pop
      Mood: Happy | Energy: High Energy

 3. 🇺🇸 Suffering (Jacob Hemphill (SOJA) Remix)
     Artist: Rebelution
     Match Score: 99.2%
     Genre: Reggae
      Mood: Happy | Energy: High Energy

 4. 🇺🇸 London Loves - 2012 Remaster
     Artist: Blur
     Match Score: 99.2%
     Genre: Electronic
      Mood: Happy | Energy: High Energy

 5. 🇺🇸 Trapped In My Mind
     Artist: Kid Cudi
     Match Score: 99.0%
     Genr

 Save these recommendations to a file? (y/n):  n





 Want more recommendations? (y/n):  n


 Thanks for using the music recommender! Enjoy your music!
