In [2]:
pip install scikit-learn

Collecting scikit-learn
  Using cached scikit_learn-1.7.2-cp313-cp313-win_amd64.whl.metadata (11 kB)
Collecting scipy>=1.8.0 (from scikit-learn)
  Using cached scipy-1.16.2-cp313-cp313-win_amd64.whl.metadata (60 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Using cached joblib-1.5.2-py3-none-any.whl.metadata (5.6 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Using cached threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)
Using cached scikit_learn-1.7.2-cp313-cp313-win_amd64.whl (8.7 MB)
Using cached joblib-1.5.2-py3-none-any.whl (308 kB)
Using cached scipy-1.16.2-cp313-cp313-win_amd64.whl (38.5 MB)
Using cached threadpoolctl-3.6.0-py3-none-any.whl (18 kB)
Installing collected packages: threadpoolctl, scipy, joblib, scikit-learn

   ---------- ----------------------------- 1/4 [scipy]
   ---------- ----------------------------- 1/4 [scipy]
   ---------- ----------------------------- 1/4 [scipy]
   ---------- ----------------------------- 1/4 [scipy]
   ---------- ---

In [51]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.cluster import KMeans
import joblib

# --- 1. LOAD AND PREPARE THE DATASET ---
print("🎵 Loading the Spotify song dataset...")
dataset_path = 'spotify_dataset.csv'
df = pd.read_csv(dataset_path)

features_to_use = ['valence', 'energy', 'danceability', 'tempo', 'acousticness']
df.dropna(subset=features_to_use, inplace=True)

# Normalize the features
scaler = MinMaxScaler()
df_normalized = pd.DataFrame(scaler.fit_transform(df[features_to_use]), columns=features_to_use)

# <-- THE FIRST FIX: Convert the entire dataset to float32 before training
df_normalized = df_normalized.astype(np.float32)

print("✅ Song dataset prepared with 5 normalized features!")

# --- 2. CREATE MOOD CLUSTERS USING K-MEANS ---
n_clusters = 20
print(f"\n🤖 Creating {n_clusters} mood clusters with K-Means...")

kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
# Assign a cluster label to each song
df['cluster'] = kmeans.fit_predict(df_normalized)

print("✅ Clustering complete!")

# --- 3. SAVE THE RESULTS ---
clustered_dataset_path = 'spotify_dataset_with_clusters.csv'
df.to_csv(clustered_dataset_path, index=False)
print(f"💾 Clustered dataset saved to '{clustered_dataset_path}'")

joblib.dump(scaler, 'scaler.pkl')
joblib.dump(kmeans, 'kmeans.pkl')
print("💾 Scaler and K-Means model (with correct data type) saved to files.")

🎵 Loading the Spotify song dataset...
✅ Song dataset prepared with 5 normalized features!

🤖 Creating 20 mood clusters with K-Means...
✅ Clustering complete!
💾 Clustered dataset saved to 'spotify_dataset_with_clusters.csv'
💾 Scaler and K-Means model (with correct data type) saved to files.


In [55]:
# --- Re-import libraries and load saved models ---
import pandas as pd
import numpy as np
import tensorflow as tf
from PIL import Image
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import preprocess_input
import joblib
import os

# --- 1. LOAD ALL MODELS AND DATA ---
print("🧠 Loading models and clustered dataset...")
image_model = ResNet50(weights='imagenet', include_top=False, pooling='avg')
scaler = joblib.load('scaler.pkl')
kmeans = joblib.load('kmeans.pkl')
df_clustered = pd.read_csv('spotify_dataset_with_clusters.csv')
print("✅ All models and data loaded successfully!")


# --- 2. IMAGE ANALYSIS FUNCTION ---
def extract_image_features(img_path):
    img = Image.open(img_path).resize((224, 224))
    if img.mode != "RGB":
        img = img.convert("RGB")
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    features = image_model.predict(x, verbose=0)
    return features.flatten()


# --- 3. FINAL RECOMMENDATION ENGINE ---
def recommend_songs(image_path, top_n=5):
    print(f"\n🖼️ Analyzing image: {image_path}")
    image_features = extract_image_features(image_path)
    
    raw_mood = np.array([[
        np.mean(image_features), np.median(image_features), np.std(image_features),
        np.max(image_features) - np.min(image_features), np.quantile(image_features, 0.75)
    ]])
    
    scaled_mood = scaler.transform(raw_mood)
    
    # <-- THE SECOND FIX: Ensure input is also float32 for safety
    scaled_mood = scaled_mood.astype(np.float32)
    
    predicted_cluster = kmeans.predict(scaled_mood)[0]
    print(f"✨ Image mapped to Mood Cluster #{predicted_cluster}")
    
    cluster_songs = df_clustered[df_clustered['cluster'] == predicted_cluster]
    
    return cluster_songs.sample(n=min(top_n, len(cluster_songs)))


# --- 4. HOW TO USE IT ---
test_image_path = 'shash.jpeg' # Change this to your test image

if os.path.exists(test_image_path):
    recommended_songs = recommend_songs(test_image_path)
    print("\n✨ Top 5 Song Recommendations from the Cluster:")
    display_cols = ['track_name', 'artists', 'valence', 'energy', 'danceability', 'cluster']
    print(recommended_songs[display_cols])
else:
    print(f"\n❌ ERROR: Test image '{test_image_path}' not found.")

🧠 Loading models and clustered dataset...
✅ All models and data loaded successfully!

🖼️ Analyzing image: shash.jpeg
✨ Image mapped to Mood Cluster #9

✨ Top 5 Song Recommendations from the Cluster:
                track_name                   artists  valence  energy  \
94842        see you leave  sorrow;Thomas Reid;Zaini    0.619   0.397   
108836    Jamás Retornarás    Miguel Calo;Raúl Berón    0.513   0.304   
93474   Парижские фантазии              Oleg Pogudin    0.660   0.339   
45036   Prisionero Del Mar             Los Tecolines    0.812   0.283   
105458        Mr Lightfoot            Riverside Park    0.424   0.124   

        danceability  cluster  
94842          0.695        9  
108836         0.700        9  
93474          0.739        9  
45036          0.711        9  
105458         0.673        9  




FEATURE 2 - FEEDBACK 

In [59]:
# --- Re-import libraries and load saved models ---
import pandas as pd
import numpy as np
import tensorflow as tf
from PIL import Image
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import preprocess_input
import joblib
import os

# --- 1. LOAD ALL MODELS AND DATA ---
print("🧠 Loading models and clustered dataset...")
image_model = ResNet50(weights='imagenet', include_top=False, pooling='avg')
scaler = joblib.load('scaler.pkl')
kmeans = joblib.load('kmeans.pkl')
df_clustered = pd.read_csv('spotify_dataset_with_clusters.csv')
print("✅ All models and data loaded successfully!")

# --- 2. SETUP FEEDBACK DATABASE ---
FEEDBACK_FILE = 'user_feedback.csv'

def save_feedback(user_id, track_id, feedback):
    """Saves user feedback to a CSV file."""
    new_feedback = pd.DataFrame([{'user_id': user_id, 'track_id': track_id, 'feedback': feedback}])
    if not os.path.exists(FEEDBACK_FILE):
        new_feedback.to_csv(FEEDBACK_FILE, index=False)
    else:
        new_feedback.to_csv(FEEDBACK_FILE, mode='a', header=False, index=False)
    print(f"👍 Feedback saved: User {user_id} {'liked' if feedback == 1 else 'disliked'} track {track_id}")

def get_user_likes(user_id):
    """Gets a list of all track IDs a user has liked."""
    if not os.path.exists(FEEDBACK_FILE):
        return []
    feedback_df = pd.read_csv(FEEDBACK_FILE)
    user_feedback = feedback_df[feedback_df['user_id'] == user_id]
    liked_tracks = user_feedback[user_feedback['feedback'] == 1]['track_id'].tolist()
    return liked_tracks

# --- 3. IMAGE ANALYSIS FUNCTION ---
def extract_image_features(img_path):
    img = Image.open(img_path).resize((224, 224))
    if img.mode != "RGB":
        img = img.convert("RGB")
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    features = image_model.predict(x, verbose=0)
    return features.flatten()

# --- 4. RECOMMENDATION ENGINE WITH FEEDBACK ---
def recommend_songs(image_path, user_id, top_n=5):
    print(f"\n🖼️ Analyzing image for User #{user_id}: {image_path}")
    image_features = extract_image_features(image_path)
    
    raw_mood = np.array([[
        np.mean(image_features), np.median(image_features), np.std(image_features),
        np.max(image_features) - np.min(image_features), np.quantile(image_features, 0.75)
    ]])
    scaled_mood = scaler.transform(raw_mood).astype(np.float32)
    
    predicted_cluster = kmeans.predict(scaled_mood)[0]
    print(f"✨ Image mapped to Mood Cluster #{predicted_cluster}")
    
    # Get all songs from the predicted cluster
    cluster_songs = df_clustered[df_clustered['cluster'] == predicted_cluster]
    
    # --- PERSONALIZATION LOGIC ---
    # Get the list of songs this user has previously liked
    user_liked_songs = get_user_likes(user_id)
    
    # Separate the cluster songs into "liked" and "not liked"
    liked_in_cluster = cluster_songs[cluster_songs['track_id'].isin(user_liked_songs)]
    other_songs = cluster_songs[~cluster_songs['track_id'].isin(user_liked_songs)]
    
    # Combine them, putting the liked songs first, then fill with other random songs
    final_recommendations = pd.concat([liked_in_cluster, other_songs.sample(frac=1)]).head(top_n)
    
    return final_recommendations

🧠 Loading models and clustered dataset...
✅ All models and data loaded successfully!


In [58]:
# --- 5. SIMULATION & USAGE ---
# Define a test user and image
test_user_id = 1
test_image_path = 'mountain.jpeg' # Use an image in your /notebooks folder

# 1. Get initial recommendations for the user
print("--- FIRST RECOMMENDATION (NO FEEDBACK YET) ---")
initial_recs = recommend_songs(test_image_path, user_id=test_user_id)
print("\n✨ Top 5 Song Recommendations:")
display_cols = ['track_id', 'track_name', 'artists']
print(initial_recs[display_cols])

# 2. Simulate the user "liking" the second recommended song
if not initial_recs.empty:
    song_to_like = initial_recs.iloc[1] # Let's say they like the second song
    save_feedback(user_id=test_user_id, track_id=song_to_like['track_id'], feedback=1) # 1 for "like"
else:
    print("\nNo recommendations were generated to provide feedback on.")

# 3. Get recommendations for the SAME image AGAIN
print("\n\n--- SECOND RECOMMENDATION (AFTER LIKING A SONG) ---")
new_recs = recommend_songs(test_image_path, user_id=test_user_id)
print("\n✨ Top 5 Song Recommendations (Personalized):")
print(new_recs[display_cols])

--- FIRST RECOMMENDATION (NO FEEDBACK YET) ---

🖼️ Analyzing image for User #1: mountain.jpeg
✨ Image mapped to Mood Cluster #19

✨ Top 5 Song Recommendations:
                      track_id  \
79200   1Ic5yiebfz4GfOTfJnzZ3W   
28706   3eWjl48K6Kgn5Vr7iKWsh9   
102351  32YHCNItnCdiYaPbw0uTo8   
102585  4FN82sgnqneYuQnW9VFygo   
73809   0S0zgiheqNBkRjEMo7pnig   

                                               track_name  \
79200   I Know You - From The "Fifty Shades Of Grey" S...   
28706                                           Right Now   
102351  I Put A Spell On You (Fifty Shades of Grey) - ...   
102585                        Won't Be Home For Christmas   
73809                               Lightning Over Heaven   

                                     artists  
79200                            Skylar Grey  
28706                           NURKO;Misdom  
102351                          Annie Lennox  
102585  Hootie & The Blowfish;Abigail Hodges  
73809                      Amelie



VIDEO ANALYSIS - FEATURE 3

In [60]:
pip install opencv-python

Collecting opencv-python
  Downloading opencv_python-4.12.0.88-cp37-abi3-win_amd64.whl.metadata (19 kB)
Collecting numpy<2.3.0,>=2 (from opencv-python)
  Using cached numpy-2.2.6-cp313-cp313-win_amd64.whl.metadata (60 kB)
Downloading opencv_python-4.12.0.88-cp37-abi3-win_amd64.whl (39.0 MB)
   ---------------------------------------- 0.0/39.0 MB ? eta -:--:--
   -- ------------------------------------- 2.9/39.0 MB 14.9 MB/s eta 0:00:03
   ------ --------------------------------- 6.0/39.0 MB 14.7 MB/s eta 0:00:03
   --------- ------------------------------ 8.9/39.0 MB 14.7 MB/s eta 0:00:03
   ------------ --------------------------- 11.8/39.0 MB 14.7 MB/s eta 0:00:02
   --------------- ------------------------ 14.7/39.0 MB 14.7 MB/s eta 0:00:02
   ------------------ --------------------- 17.8/39.0 MB 14.7 MB/s eta 0:00:02
   ------------------- -------------------- 19.4/39.0 MB 13.6 MB/s eta 0:00:02
   -------------------- ------------------- 20.2/39.0 MB 12.6 MB/s eta 0:00:02
   ------

  You can safely remove it manually.
  You can safely remove it manually.


In [61]:
import pandas as pd
import numpy as np
import tensorflow as tf
from PIL import Image
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import preprocess_input
import joblib
import os
import cv2  # The OpenCV library for video processing

# --- 1. LOAD ALL MODELS AND DATA ---
print("🧠 Loading all models and the clustered dataset...")
image_model = ResNet50(weights='imagenet', include_top=False, pooling='avg')
scaler = joblib.load('scaler.pkl')
kmeans = joblib.load('kmeans.pkl')
df_clustered = pd.read_csv('spotify_dataset_with_clusters.csv')
print("✅ All models and data loaded successfully!")

# --- 2. FEEDBACK SYSTEM FUNCTIONS ---
FEEDBACK_FILE = 'user_feedback.csv'
def save_feedback(user_id, track_id, feedback):
    new_feedback = pd.DataFrame([{'user_id': user_id, 'track_id': track_id, 'feedback': feedback}])
    if not os.path.exists(FEEDBACK_FILE):
        new_feedback.to_csv(FEEDBACK_FILE, index=False)
    else:
        new_feedback.to_csv(FEEDBACK_FILE, mode='a', header=False, index=False)
    print(f"👍 Feedback saved for User {user_id}")

def get_user_likes(user_id):
    if not os.path.exists(FEEDBACK_FILE): return []
    feedback_df = pd.read_csv(FEEDBACK_FILE)
    user_likes = feedback_df[(feedback_df['user_id'] == user_id) & (feedback_df['feedback'] == 1)]
    return user_likes['track_id'].tolist()

# --- 3. MEDIA PROCESSING FUNCTIONS ---
def extract_image_features(img):
    """Takes a PIL Image object and returns its feature vector."""
    img = img.resize((224, 224))
    if img.mode != "RGB":
        img = img.convert("RGB")
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    features = image_model.predict(x, verbose=0)
    return features.flatten()

def extract_frames_from_video(video_path, num_frames=5):
    """Extracts a set number of evenly spaced frames from a video."""
    frames = []
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    if total_frames < num_frames:
        print("⚠️ Video is too short; using all available frames.")
        num_frames = total_frames
    
    frame_indices = np.linspace(0, total_frames - 1, num_frames, dtype=int)
    
    for i in frame_indices:
        cap.set(cv2.CAP_PROP_POS_FRAMES, i)
        ret, frame = cap.read()
        if ret:
            # Convert frame from OpenCV's BGR format to PIL's RGB format
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frames.append(Image.fromarray(frame_rgb))
            
    cap.release()
    print(f"📹 Extracted {len(frames)} frames from video.")
    return frames

# --- 4. THE FINAL RECOMMENDATION ENGINE ---
def get_recommendations(file_path, user_id, top_n=5):
    print(f"\n▶️ Analyzing file for User #{user_id}: {file_path}")
    
    # Determine if the file is an image or video
    video_extensions = ['.mp4', '.mov', '.avi', '.mkv']
    file_ext = os.path.splitext(file_path)[1].lower()
    
    # --- Feature Extraction ---
    if file_ext in video_extensions:
        # It's a video: extract frames and average their features
        frames = extract_frames_from_video(file_path)
        if not frames:
            print("❌ Could not extract frames from video.")
            return pd.DataFrame()
        
        frame_features = [extract_image_features(frame) for frame in frames]
        final_features = np.mean(frame_features, axis=0)
    else:
        # It's an image: extract features directly
        img = Image.open(file_path)
        final_features = extract_image_features(img)
        
    # --- Mood Prediction & Recommendation (same as before) ---
    raw_mood = np.array([[
        np.mean(final_features), np.median(final_features), np.std(final_features),
        np.max(final_features) - np.min(final_features), np.quantile(final_features, 0.75)
    ]])
    scaled_mood = scaler.transform(raw_mood).astype(np.float32)
    predicted_cluster = kmeans.predict(scaled_mood)[0]
    print(f"✨ File mapped to Mood Cluster #{predicted_cluster}")
    
    cluster_songs = df_clustered[df_clustered['cluster'] == predicted_cluster]
    
    # --- Personalization ---
    user_liked_songs = get_user_likes(user_id)
    liked_in_cluster = cluster_songs[cluster_songs['track_id'].isin(user_liked_songs)]
    other_songs = cluster_songs[~cluster_songs['track_id'].isin(user_liked_songs)]
    final_recommendations = pd.concat([liked_in_cluster, other_songs.sample(frac=1)]).head(top_n)
    
    return final_recommendations

🧠 Loading all models and the clustered dataset...
✅ All models and data loaded successfully!


In [63]:
# --- 5. USAGE EXAMPLE ---
test_user_id = 1

# --- Test with an Image ---
test_image_path = 'grass.jpeg' # Make sure you have an image file
if os.path.exists(test_image_path):
    image_recs = get_recommendations(test_image_path, user_id=test_user_id)
    print("\n✨ Top 5 Recommendations for the Image:")
    print(image_recs[['track_name', 'artists', 'cluster']])
else:
    print(f"⚠️ Test image '{test_image_path}' not found.")
    
print("\n" + "="*50 + "\n") # Separator

# --- Test with a Video ---
test_video_path = 'trek.mp4' # Add a short video file to your folder
if os.path.exists(test_video_path):
    video_recs = get_recommendations(test_video_path, user_id=test_user_id)
    print("\n✨ Top 5 Recommendations for the Video:")
    print(video_recs[['track_name', 'artists', 'cluster']])
else:
    print(f"⚠️ Test video '{test_video_path}' not found.")


▶️ Analyzing file for User #1: grass.jpeg
✨ File mapped to Mood Cluster #19

✨ Top 5 Recommendations for the Image:
                                              track_name  \
28706                                          Right Now   
29558                                          Right Now   
79200  I Know You - From The "Fifty Shades Of Grey" S...   
99134                                     Christmas Time   
40504                                Teu Toque - Ao Vivo   

                         artists  cluster  
28706               NURKO;Misdom       19  
29558               NURKO;Misdom       19  
79200                Skylar Grey       19  
99134                Bryan Adams       19  
40504  Gabi Sampaio;Nívea Soares       19  



▶️ Analyzing file for User #1: trek.mp4




📹 Extracted 5 frames from video.
✨ File mapped to Mood Cluster #19

✨ Top 5 Recommendations for the Video:
                                              track_name        artists  \
28706                                          Right Now   NURKO;Misdom   
29558                                          Right Now   NURKO;Misdom   
79200  I Know You - From The "Fifty Shades Of Grey" S...    Skylar Grey   
40834                                 Esperar É Caminhar   Palavrantiga   
62512                                            ひまわりの約束  Motohiro Hata   

       cluster  
28706       19  
29558       19  
79200       19  
40834       19  
62512       19  




OPTION 1 - USING HUGGING FACE MODAL

In [64]:
pip install transformers torch torchvision

Collecting transformers
  Using cached transformers-4.56.2-py3-none-any.whl.metadata (40 kB)
Collecting torch
  Using cached torch-2.8.0-cp313-cp313-win_amd64.whl.metadata (30 kB)
Collecting torchvision
  Downloading torchvision-0.23.0-cp313-cp313-win_amd64.whl.metadata (6.1 kB)
Collecting filelock (from transformers)
  Using cached filelock-3.19.1-py3-none-any.whl.metadata (2.1 kB)
Collecting huggingface-hub<1.0,>=0.34.0 (from transformers)
  Downloading huggingface_hub-0.35.1-py3-none-any.whl.metadata (14 kB)
Collecting regex!=2019.12.17 (from transformers)
  Using cached regex-2025.9.18-cp313-cp313-win_amd64.whl.metadata (41 kB)
Collecting tokenizers<=0.23.0,>=0.22.0 (from transformers)
  Using cached tokenizers-0.22.1-cp39-abi3-win_amd64.whl.metadata (6.9 kB)
Collecting safetensors>=0.4.3 (from transformers)
  Using cached safetensors-0.6.2-cp38-abi3-win_amd64.whl.metadata (4.1 kB)
Collecting fsspec>=2023.5.0 (from huggingface-hub<1.0,>=0.34.0->transformers)
  Using cached fsspec-2

In [76]:
import pandas as pd
import numpy as np
from PIL import Image
import os
from sklearn.preprocessing import MinMaxScaler
from sklearn.cluster import KMeans
from transformers import ViTFeatureExtractor, ViTForImageClassification
import warnings

# Suppress verbose warnings from scikit-learn
warnings.filterwarnings('ignore', category=FutureWarning)

# --- 1. LOAD DATA AND BUILD MODELS ON-THE-FLY ---
print("🧠 Initializing session...")

# Load the base dataset
df_clustered = pd.read_csv('spotify_dataset_with_clusters.csv')

# Prepare the features for clustering (using float64, the default)
features_to_use = ['valence', 'energy', 'danceability', 'tempo', 'acousticness']
df_clustered.dropna(subset=features_to_use, inplace=True)
scaler = MinMaxScaler()
song_features_normalized = scaler.fit_transform(df_clustered[features_to_use])

# Train the K-Means model in memory
n_clusters = 20
kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init='auto')
df_clustered['cluster'] = kmeans.fit_predict(song_features_normalized)

# Load the image tagging model
feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224')
tagging_model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224')
print("✅ All models and data are ready!")


# --- 2. THE MOOD DICTIONARY ---
mood_map = {
    ('party', 'celebration', 'concert', 'crowd'): {'valence': 0.3, 'energy': 0.2, 'danceability': 0.3},
    ('smile', 'joy', 'laughing'): {'valence': 0.4, 'energy': 0.1},
    ('beach', 'seashore', 'coast'): {'valence': 0.2, 'energy': -0.2, 'acousticness': 0.1},
    ('sunset', 'sunrise', 'landscape'): {'valence': 0.1, 'energy': -0.3, 'acousticness': 0.3},
    ('forest', 'nature', 'mountain'): {'valence': 0.1, 'energy': -0.2, 'acousticness': 0.4},
    ('sad', 'gloomy', 'rain'): {'valence': -0.4, 'energy': -0.3},
    ('dark', 'night'): {'energy': -0.2},
    ('car', 'driving', 'road'): {'energy': 0.2, 'tempo': 0.1},
    ('sports', 'running'): {'energy': 0.3, 'tempo': 0.2},
}

# --- 3. HELPER & RECOMMENDATION FUNCTIONS ---
def get_image_tags(img, top_k=5):
    """Uses the Vision Transformer to get descriptive tags for an image."""
    inputs = feature_extractor(images=img, return_tensors="pt")
    outputs = tagging_model(**inputs)
    logits = outputs.logits
    top_indices = logits[0].topk(top_k).indices.tolist()
    tags = [tagging_model.config.id2label[i] for i in top_indices]
    return tags

def get_recommendations_smarter(file_path, top_n=5):
    print(f"\n🖼️ Analyzing image: {file_path}")
    img = Image.open(file_path)
    
    tags = get_image_tags(img)
    print(f"✨ Detected Tags: {tags}")
    
    # Create the mood vector with the required float64 type
    target_mood = np.array([0.5, 0.5, 0.5, 0.5, 0.5], dtype=np.float64)
    
    for tag in tags:
        for keywords, mood_values in mood_map.items():
            if any(keyword in tag for keyword in keywords):
                target_mood[0] += mood_values.get('valence', 0.0)
                target_mood[1] += mood_values.get('energy', 0.0)
                target_mood[2] += mood_values.get('danceability', 0.0)
                target_mood[3] += mood_values.get('tempo', 0.0)
                target_mood[4] += mood_values.get('acousticness', 0.0)
                
    target_mood = np.clip(target_mood, 0, 1)
    
    # Scale the target mood using the scaler we created in this session
    scaled_mood = scaler.transform(target_mood.reshape(1, -1))
    
    # Predict the cluster using the model we created in this session
    predicted_cluster = kmeans.predict(scaled_mood)[0]
    print(f"✨ Image mapped to Mood Cluster #{predicted_cluster}")
    
    cluster_songs = df_clustered[df_clustered['cluster'] == predicted_cluster]
    return cluster_songs.sample(n=min(top_n, len(cluster_songs)))


# --- 4. HOW TO USE IT ---
test_image_path = 'mountain.jpeg' # Change to your image file

if os.path.exists(test_image_path):
    recommended_songs = get_recommendations_smarter(test_image_path)
    print("\n✨ Top 5 'Smarter' Song Recommendations:")
    display_cols = ['track_name', 'artists', 'valence', 'energy', 'danceability', 'cluster']
    print(recommended_songs[display_cols])
else:
    print(f"\n❌ ERROR: Test image '{test_image_path}' not found.")

🧠 Initializing session...
✅ All models and data are ready!

🖼️ Analyzing image: mountain.jpeg
✨ Detected Tags: ['valley, vale', 'worm fence, snake fence, snake-rail fence, Virginia fence', 'lakeside, lakeshore', 'alp', 'hay']
✨ Image mapped to Mood Cluster #11

✨ Top 5 'Smarter' Song Recommendations:
          track_name             artists  valence  energy  danceability  \
21464   Gvnman Shift               Skeng    0.368   0.570         0.911   
7801         Get Sad  Greensky Bluegrass    0.488   0.400         0.531   
62417          Lemon       Kenshi Yonezu    0.446   0.661         0.532   
112657    ROADRUNNER    Jefe;Bar B;Bekom    0.441   0.464         0.809   
3383      Automobile               KALEO    0.356   0.553         0.692   

        cluster  
21464        11  
7801         11  
62417        11  
112657       11  
3383         11  




BOTH IMAGE AND VIDEO

In [77]:
import pandas as pd
import numpy as np
from PIL import Image
import os
from sklearn.preprocessing import MinMaxScaler
from sklearn.cluster import KMeans
from transformers import ViTFeatureExtractor, ViTForImageClassification
import warnings
import cv2 # <-- NEW: Import OpenCV for video processing

# Suppress verbose warnings
warnings.filterwarnings('ignore', category=FutureWarning)

# --- 1. LOAD DATA AND BUILD MODELS ON-THE-FLY ---
print("🧠 Initializing session...")
df_clustered = pd.read_csv('spotify_dataset_with_clusters.csv')
features_to_use = ['valence', 'energy', 'danceability', 'tempo', 'acousticness']
df_clustered.dropna(subset=features_to_use, inplace=True)
scaler = MinMaxScaler()
song_features_normalized = scaler.fit_transform(df_clustered[features_to_use])
n_clusters = 20
kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init='auto')
df_clustered['cluster'] = kmeans.fit_predict(song_features_normalized)
feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224')
tagging_model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224')
print("✅ All models and data are ready!")


# --- 2. THE MOOD DICTIONARY ---
mood_map = {
    ('party', 'celebration', 'concert', 'crowd'): {'valence': 0.3, 'energy': 0.2, 'danceability': 0.3},
    ('smile', 'joy', 'laughing'): {'valence': 0.4, 'energy': 0.1},
    ('beach', 'seashore', 'coast'): {'valence': 0.2, 'energy': -0.2, 'acousticness': 0.1},
    ('sunset', 'sunrise', 'landscape'): {'valence': 0.1, 'energy': -0.3, 'acousticness': 0.3},
    ('forest', 'nature', 'mountain'): {'valence': 0.1, 'energy': -0.2, 'acousticness': 0.4},
    ('sad', 'gloomy', 'rain'): {'valence': -0.4, 'energy': -0.3},
    ('dark', 'night'): {'energy': -0.2},
    ('car', 'driving', 'road'): {'energy': 0.2, 'tempo': 0.1},
    ('sports', 'running'): {'energy': 0.3, 'tempo': 0.2},
}

# --- 3. HELPER & RECOMMENDATION FUNCTIONS ---
def get_image_tags(img, top_k=3): # Get fewer tags per frame to find the main subject
    """Uses the Vision Transformer to get descriptive tags for a single image (or frame)."""
    inputs = feature_extractor(images=img, return_tensors="pt")
    outputs = tagging_model(**inputs)
    logits = outputs.logits
    top_indices = logits[0].topk(top_k).indices.tolist()
    tags = [tagging_model.config.id2label[i] for i in top_indices]
    return tags

def extract_frames_from_video(video_path, num_frames=5):
    """Extracts a set number of evenly spaced frames from a video."""
    frames = []
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    if not cap.isOpened():
        print("❌ Error opening video file")
        return frames

    frame_indices = np.linspace(0, total_frames - 1, num_frames, dtype=int)
    
    for i in frame_indices:
        cap.set(cv2.CAP_PROP_POS_FRAMES, i)
        ret, frame = cap.read()
        if ret:
            # Convert frame from OpenCV's BGR format to PIL's RGB format
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frames.append(Image.fromarray(frame_rgb))
            
    cap.release()
    print(f"📹 Extracted {len(frames)} frames from video.")
    return frames

def get_recommendations(file_path, top_n=5):
    print(f"\n▶️ Analyzing file: {file_path}")
    
    # --- Step A: Get tags from Image or Video ---
    video_extensions = ['.mp4', '.mov', 'avi', 'mkv']
    file_ext = os.path.splitext(file_path)[1].lower()
    
    all_tags = []
    if file_ext in video_extensions:
        frames = extract_frames_from_video(file_path)
        if not frames:
            print("Could not process video. Aborting recommendation.")
            return
        for frame in frames:
            all_tags.extend(get_image_tags(frame))
    else: # Assume it's an image
        img = Image.open(file_path)
        all_tags = get_image_tags(img, top_k=5) # Get more tags for a single image

    print(f"✨ Detected Tags: {list(set(all_tags))}") # Show unique tags
    
    # --- Step B: Calculate Mood (same logic as before) ---
    target_mood = np.array([0.5, 0.5, 0.5, 0.5, 0.5], dtype=np.float64)
    for tag in all_tags:
        for keywords, mood_values in mood_map.items():
            if any(keyword in tag for keyword in keywords):
                target_mood[0] += mood_values.get('valence', 0.0)
                target_mood[1] += mood_values.get('energy', 0.0)
                target_mood[2] += mood_values.get('danceability', 0.0)
                target_mood[3] += mood_values.get('tempo', 0.0)
                target_mood[4] += mood_values.get('acousticness', 0.0)
    
    target_mood = np.clip(target_mood, 0, 1)
    scaled_mood = scaler.transform(target_mood.reshape(1, -1))
    
    # --- Step C: Predict and Recommend (same logic as before) ---
    predicted_cluster = kmeans.predict(scaled_mood)[0]
    print(f"✨ Media mapped to Mood Cluster #{predicted_cluster}")
    cluster_songs = df_clustered[df_clustered['cluster'] == predicted_cluster]
    return cluster_songs.sample(n=min(top_n, len(cluster_songs)))

# --- 4. HOW TO USE IT ---
# Test with an image
test_image_path = 'grass.jpeg' # Change to your image file
if os.path.exists(test_image_path):
    recommended_songs = get_recommendations(test_image_path)
    print("\n✨ Top 5 Recommendations for the IMAGE:")
    display_cols = ['track_name', 'artists', 'valence', 'energy', 'danceability', 'cluster']
    print(recommended_songs[display_cols])
else:
    print(f"\n❌ ERROR: Test image '{test_image_path}' not found.")

print("\n" + "="*50 + "\n") # Separator

# Test with a video
test_video_path = 'trek.mp4' # Change to your video file
if os.path.exists(test_video_path):
    recommended_songs = get_recommendations(test_video_path)
    print("\n✨ Top 5 Recommendations for the VIDEO:")
    display_cols = ['track_name', 'artists', 'valence', 'energy', 'danceability', 'cluster']
    print(recommended_songs[display_cols])
else:
    print(f"\n❌ ERROR: Test video '{test_video_path}' not found.")

🧠 Initializing session...
✅ All models and data are ready!

▶️ Analyzing file: grass.jpeg
✨ Detected Tags: ['rapeseed', 'balloon', 'hay', 'flagpole, flagstaff', 'pole']
✨ Media mapped to Mood Cluster #11

✨ Top 5 Recommendations for the IMAGE:
            track_name        artists  valence  energy  danceability  cluster
94826        about you  sadeyes;Powfu    0.390   0.496         0.566       11
84763          Şakalar          Flört    0.503   0.481         0.638       11
24013              Aja     Jay Daniel    0.225   0.464         0.790       11
67513          Algodón          Lasso    0.444   0.552         0.741       11
99620  Stop This Train     John Mayer    0.408   0.437         0.619       11



▶️ Analyzing file: trek.mp4
📹 Extracted 5 frames from video.




✨ Detected Tags: ['mountain bike, all-terrain bike, off-roader', 'lakeside, lakeshore', 'valley, vale', 'maze, labyrinth', 'greenhouse, nursery, glasshouse', 'cliff, drop, drop-off', 'balloon', 'hay', 'volcano']
✨ Media mapped to Mood Cluster #6

✨ Top 5 Recommendations for the VIDEO:
                            track_name                  artists  valence  \
79882                    Zero O' Clock             Smyang Piano    0.165   
15549                    Healing Touch                DaniSogen    0.100   
15229             i still think of you  the bootleg boy;Maberry    0.308   
41093  Det tog så lång tid att bli ung          Håkan Hellström    0.362   
45042             Qué Manera de Perder             Cuco Sánchez    0.257   

       energy  danceability  cluster  
79882  0.0727         0.469        6  
15549  0.2250         0.670        6  
15229  0.1930         0.575        6  
41093  0.2970         0.405        6  
45042  0.0909         0.490        6  


