# 🎵 Music Recommendation System Notebook

## 1. Load Dataset

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
df = pd.read_csv('spotify_tracks.csv')
df.head()

## 2. Explore Dataset

In [None]:
# Display basic info
df.info()

# Check for missing values
df.isnull().sum()

## 3. Preprocess Data

In [None]:
# Convert genre to lowercase
df['genre'] = df['genre'].str.lower()

# Drop duplicates if any
df.drop_duplicates(inplace=True)

## 4. Define Mood to Genre Mapping

In [None]:
mood_genres = {
    'Chill': ['acoustic', 'chill', 'ambient', 'indie', 'lo-fi'],
    'Happy': ['pop', 'dance', 'funk', 'soul', 'reggae'],
    'Energetic': ['edm', 'electronic', 'rock', 'metal', 'house', 'techno'],
    'Romantic': ['rnb', 'soul', 'acoustic', 'ballad'],
    'Sad': ['acoustic', 'indie', 'blues', 'piano', 'soft-rock'],
    'Party': ['hip hop', 'trap', 'edm', 'reggaeton', 'pop'],
    'Focus': ['classical', 'instrumental', 'ambient', 'lo-fi', 'piano']
}

## 5. Create Mood-Based Recommendation Function

In [None]:
def recommend_by_mood(df, mood, top_n=10):
    genres = mood_genres.get(mood, [])
    filtered = df[df['genre'].isin(genres)]
    return filtered.sort_values(by='popularity', ascending=False).head(top_n)

# Example usage
recommend_by_mood(df, 'Happy')

## 6. Content-Based Recommendation Model (Optional)

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Combine textual features
df['text_features'] = df['genre'] + ' ' + df['artists'] + ' ' + df['album']

# Vectorize
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['text_features'])

# Cosine similarity matrix
cosine_sim = cosine_similarity(X, X)

def recommend_similar_songs(song_title, df=df, sim_matrix=cosine_sim, top_n=5):
    idx = df[df['name'] == song_title].index[0]
    sim_scores = list(enumerate(sim_matrix[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_indices = [i[0] for i in sim_scores[1:top_n+1]]
    return df.iloc[sim_indices]

# Example usage
# recommend_similar_songs('Acoustic')

## 7. Data Visualizations

In [None]:
# Genre distribution
genre_counts = df['genre'].value_counts().head(10)
plt.figure(figsize=(10,5))
genre_counts.plot(kind='bar', color='skyblue')
plt.title("Top 10 Genres by Track Count")
plt.ylabel("Number of Tracks")
plt.xticks(rotation=45)
plt.show()

In [None]:
# Popularity distribution
sns.histplot(df['popularity'], bins=30, kde=True, color='orange')
plt.title("Distribution of Track Popularity")
plt.xlabel("Popularity")
plt.ylabel("Count")
plt.show()

In [None]:
# Duration distribution (in minutes)
df['duration_min'] = df['duration_ms'] / 60000
sns.histplot(df['duration_min'], bins=30, kde=True, color='green')
plt.title("Distribution of Track Duration")
plt.xlabel("Duration (minutes)")
plt.ylabel("Count")
plt.show()