# Song Recommendation System
Extract playlist ‚Üí Search Reddit for recommendations ‚Üí Use ChatGPT ‚Üí Return Spotify song objects

## Step 1: Setup & Import Libraries

In [1]:
# Install dependencies
%pip install spotipy praw python-dotenv pandas openai

Collecting openai
  Downloading openai-2.14.0-py3-none-any.whl (1.1 MB)
[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m1.1/1.1 MB[0m [31m31.7 MB/s[0m eta [36m0:00:00[0m
Collecting tqdm>4
  Using cached tqdm-4.67.1-py3-none-any.whl (78 kB)
Collecting jiter<1,>=0.10.0
  Downloading jiter-0.12.0-cp310-cp310-macosx_11_0_arm64.whl (319 kB)
[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m319.8/319.8 kB[0m [31m31.6 MB/s[0m eta [36m0:00:00[0m
Collecting pydantic<3,>=1.9.0
  Downloading pydantic-2.12.5-py3-none-any.whl (463 kB)
[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m463.6/463.6 kB[0m [31m46.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting anyio<5,>=3.5.0
  Downloading anyio-4.12.0-py3-none-any.whl (113

In [2]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import praw
import pandas as pd
import json
import os
from dotenv import load_dotenv
from datetime import datetime
from openai import OpenAI

# Load environment variables
load_dotenv()

# Initialize Spotify API (Client Credentials - no user login)
spotify_client_credentials = SpotifyClientCredentials(
    client_id=os.getenv('SPOTIFY_CLIENT_ID'),
    client_secret=os.getenv('SPOTIFY_CLIENT_SECRET')
)
sp = spotipy.Spotify(client_credentials_manager=spotify_client_credentials)

# Initialize Reddit API
reddit = praw.Reddit(
    client_id=os.getenv('REDDIT_CLIENT_ID'),
    client_secret=os.getenv('REDDIT_CLIENT_SECRET'),
    username=os.getenv('REDDIT_USERNAME'),
    password=os.getenv('REDDIT_PASSWORD'),
    user_agent=os.getenv('REDDIT_USER_AGENT')
)

# Initialize OpenAI
openai_client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))

print("‚úÖ All APIs initialized")
print(f"   - Spotify: Connected (Read-only)")
print(f"   - Reddit: Connected (Read-only: {reddit.read_only})")
print(f"   - OpenAI: Connected")

‚úÖ All APIs initialized
   - Spotify: Connected (Read-only)
   - Reddit: Connected (Read-only: False)
   - OpenAI: Connected


In [3]:
# Configuration
PLAYLIST_URL = "https://open.spotify.com/playlist/3XyDvjoxiae0oWpfJ4kga9?si=d2f57623799b4ebb"
SUBREDDIT_NAME = "music"
MAX_REDDIT_POSTS_PER_QUERY = 20
MAX_COMMENTS_PER_POST = 30
NUM_RECOMMENDATIONS = 5

print(f"Configuration:")
print(f"  Playlist: {PLAYLIST_URL}")
print(f"  Subreddit: r/{SUBREDDIT_NAME}")
print(f"  Max Reddit posts per query: {MAX_REDDIT_POSTS_PER_QUERY}")
print(f"  Recommendations to generate: {NUM_RECOMMENDATIONS}")

Configuration:
  Playlist: https://open.spotify.com/playlist/3XyDvjoxiae0oWpfJ4kga9?si=d2f57623799b4ebb
  Subreddit: r/music
  Max Reddit posts per query: 20
  Recommendations to generate: 5


## Step 2: Extract Playlist Data from Spotify

In [5]:
def get_playlist_id(url):
    """Extract playlist ID from URL"""
    return url.split('playlist/')[1].split('?')[0]

# Get playlist data
playlist_id = get_playlist_id(PLAYLIST_URL)
playlist = sp.playlist(playlist_id)

print("=" * 80)
print("PLAYLIST INFORMATION")
print("=" * 80)
print(f"Name: {playlist['name']}")
print(f"Owner: {playlist['owner']['display_name']}")
print(f"Total Tracks: {playlist['tracks']['total']}")
print(f"Description: {playlist['description']}")
print("=" * 80)

# Extract tracks
tracks_data = []
results = sp.playlist_tracks(playlist_id)

for idx, item in enumerate(results['items'], 1):
    track = item['track']
    if track:
        track_info = {
            'name': track['name'],
            'artists': [artist['name'] for artist in track['artists']],
            'artist_names': ', '.join([artist['name'] for artist in track['artists']]),
            'album': track['album']['name'],
            'id': track['id'],
            'uri': track['uri'],
            'popularity': track['popularity'],
            'preview_url': track['preview_url'],
            'external_url': track.get('external_urls', {}).get('spotify', None),
            'album_image': track['album']['images'][0]['url'] if track['album']['images'] else None
        }
        tracks_data.append(track_info)
        print(f"[{idx}] {track_info['name']} - {track_info['artist_names']}")

print(f"\n‚úÖ Extracted {len(tracks_data)} tracks from playlist")

# Store for logging
playlist_data = {
    'name': playlist['name'],
    'owner': playlist['owner']['display_name'],
    'total_tracks': len(tracks_data),
    'tracks': tracks_data
}

PLAYLIST INFORMATION
Name: Summer Nights
Owner: Malik
Total Tracks: 69
Description: 
[1] Everlasting Love - Carl Carlton
[2] So Lonely - The Police
[3] I Only Have Eyes for You - The Flamingos
[4] I Never Thought I'd See the Day - Sade
[5] I'm Still In Love With You - New Edition
[6] Silver Springs - 2004 Remaster - Fleetwood Mac
[7] Footsteps in the Dark, Pts. 1 & 2 - The Isley Brothers
[8] Stop! In The Name Of Love - The Supremes
[9] Monday, Monday - Single Version - The Mamas & The Papas
[10] Ooo Baby Baby - Smokey Robinson & The Miracles
[11] Your Love Is King - Sade
[12] Take A Chance On Me - ABBA
[13] I'll Be There - The Jackson 5
[14] Love the One You're With - The Isley Brothers
[15] The Tracks Of My Tears - Smokey Robinson & The Miracles
[16] All I Do Is Think Of You - The Jackson 5
[17] Earth Angel - The Penguins
[18] Somethin' Stupid - Frank Sinatra, Nancy Sinatra
[19] Lover, You Should've Come Over - Jeff Buckley
[20] I Know It's Over - 2011 Remaster - The Smiths
[21] Got T

## Step 3: Search Reddit for Recommendations

In [6]:
def search_reddit_for_recommendations(query, subreddit_name, max_posts=20, max_comments=30):
    """
    Search Reddit for recommendation posts/comments
    Focus on: "recommend", "similar to", "if you like"
    """
    subreddit = reddit.subreddit(subreddit_name)
    recommendations = []
    
    try:
        # Search for posts
        search_results = subreddit.search(query, limit=max_posts)
        
        for post in search_results:
            # Look for recommendation keywords in title or body
            text = f"{post.title} {post.selftext}".lower()
            
            if any(keyword in text for keyword in ['recommend', 'similar', 'if you like', 'check out', 'you might like', 'fans of']):
                post_data = {
                    'title': post.title,
                    'body': post.selftext,
                    'score': post.score,
                    'url': f"https://reddit.com{post.permalink}",
                    'comments': []
                }
                
                # Get comments
                try:
                    post.comments.replace_more(limit=0)
                    for comment in post.comments.list()[:max_comments]:
                        comment_text = comment.body.lower()
                        if any(keyword in comment_text for keyword in ['recommend', 'similar', 'if you like', 'check out', 'you might like', 'try']):
                            post_data['comments'].append({
                                'body': comment.body,
                                'score': comment.score,
                                'author': str(comment.author) if comment.author else '[deleted]'
                            })
                except Exception as e:
                    pass
                
                if post_data['comments'] or any(keyword in text for keyword in ['recommend', 'similar']):
                    recommendations.append(post_data)
    
    except Exception as e:
        print(f"   ‚ö†Ô∏è Error searching Reddit: {e}")
    
    return recommendations

# Search for recommendations based on playlist tracks
print("=" * 80)
print("SEARCHING REDDIT FOR RECOMMENDATIONS")
print("=" * 80)

all_reddit_data = []

# Search for top 5 tracks + top 3 artists
top_tracks = sorted(tracks_data, key=lambda x: x['popularity'], reverse=True)[:5]
all_artists = list(set([artist for track in tracks_data for artist in track['artists']]))[:3]

print(f"\nüîç Searching for recommendations based on:")
print(f"   - Top {len(top_tracks)} tracks")
print(f"   - Top {len(all_artists)} artists")
print()

# Search by track name
for idx, track in enumerate(top_tracks, 1):
    print(f"[{idx}/{len(top_tracks)}] Searching: '{track['name']}'")
    query = f"{track['name']} {track['artist_names']} recommend"
    results = search_reddit_for_recommendations(query, SUBREDDIT_NAME, MAX_REDDIT_POSTS_PER_QUERY, MAX_COMMENTS_PER_POST)
    
    if results:
        all_reddit_data.extend(results)
        print(f"         ‚úÖ Found {len(results)} recommendation posts/threads")
    else:
        print(f"         ‚ÑπÔ∏è  No recommendations found")

# Search by artist
for idx, artist in enumerate(all_artists, 1):
    print(f"[Artist {idx}/{len(all_artists)}] Searching: '{artist}'")
    query = f"{artist} recommend similar"
    results = search_reddit_for_recommendations(query, SUBREDDIT_NAME, MAX_REDDIT_POSTS_PER_QUERY, MAX_COMMENTS_PER_POST)
    
    if results:
        all_reddit_data.extend(results)
        print(f"         ‚úÖ Found {len(results)} recommendation posts/threads")
    else:
        print(f"         ‚ÑπÔ∏è  No recommendations found")

print(f"\n‚úÖ Total Reddit data collected: {len(all_reddit_data)} posts with recommendations")
print(f"   Total comments: {sum(len(post['comments']) for post in all_reddit_data)}")

SEARCHING REDDIT FOR RECOMMENDATIONS

üîç Searching for recommendations based on:
   - Top 5 tracks
   - Top 3 artists

[1/5] Searching: 'DAISIES'
         ‚úÖ Found 12 recommendation posts/threads
[2/5] Searching: 'Running Up That Hill (A Deal With God)'
         ‚úÖ Found 1 recommendation posts/threads
[3/5] Searching: 'Lover, You Should've Come Over'
         ‚úÖ Found 3 recommendation posts/threads
[4/5] Searching: 'Silver Springs - 2004 Remaster'
         ‚úÖ Found 2 recommendation posts/threads
[5/5] Searching: 'Knockin' On Heaven's Door'
         ‚úÖ Found 5 recommendation posts/threads
[Artist 1/3] Searching: 'Prince'
         ‚úÖ Found 20 recommendation posts/threads
[Artist 2/3] Searching: 'Led Zeppelin'
         ‚úÖ Found 20 recommendation posts/threads
[Artist 3/3] Searching: 'The Flamingos'
         ‚úÖ Found 1 recommendation posts/threads

‚úÖ Total Reddit data collected: 64 posts with recommendations
   Total comments: 211


## Step 4: Format Data for ChatGPT

In [7]:
# Create comprehensive prompt for ChatGPT
print("=" * 80)
print("CREATING CHATGPT PROMPT")
print("=" * 80)

# Build playlist summary
playlist_summary = f"Playlist: {playlist_data['name']}\n"
playlist_summary += f"Total Tracks: {playlist_data['total_tracks']}\n\n"
playlist_summary += "Top Tracks:\n"
for i, track in enumerate(top_tracks[:10], 1):
    playlist_summary += f"{i}. {track['name']} - {track['artist_names']}\n"

# Build Reddit recommendations summary
reddit_summary = "\nReddit Community Recommendations:\n\n"
for idx, post in enumerate(all_reddit_data[:15], 1):  # Limit to avoid token overflow
    reddit_summary += f"Post {idx}: {post['title']}\n"
    if post['body']:
        reddit_summary += f"Content: {post['body'][:300]}...\n"
    
    # Add top comments
    if post['comments']:
        reddit_summary += "Top Comments:\n"
        for comment in post['comments'][:3]:
            reddit_summary += f"  - {comment['body'][:200]}...\n"
    reddit_summary += "\n"

# Create the prompt
chatgpt_prompt = f"""You are a music recommendation expert. Based on a user's Spotify playlist and Reddit community recommendations, suggest 5 songs they will likely enjoy.

USER'S PLAYLIST:
{playlist_summary}

REDDIT RECOMMENDATIONS FROM r/{SUBREDDIT_NAME}:
{reddit_summary}

TASK:
Analyze the user's music taste from their playlist and the Reddit community recommendations. Recommend 5 NEW songs (not in the original playlist) that the user will love.

IMPORTANT: Return ONLY a JSON array with exactly {NUM_RECOMMENDATIONS} songs in this format:
[
  {{"song": "Song Name", "artist": "Artist Name"}},
  {{"song": "Song Name", "artist": "Artist Name"}},
  ...
]

Do NOT include any explanation, just the JSON array. Make sure songs are real and can be found on Spotify."""

print(f"‚úÖ Prompt created")
print(f"   Playlist tracks included: {len(top_tracks)}")
print(f"   Reddit posts included: {min(len(all_reddit_data), 15)}")
print(f"   Total prompt length: {len(chatgpt_prompt)} characters")
print(f"\nüìù Prompt preview (first 500 chars):")
print("-" * 80)
print(chatgpt_prompt[:500] + "...")
print("-" * 80)

CREATING CHATGPT PROMPT
‚úÖ Prompt created
   Playlist tracks included: 5
   Reddit posts included: 15
   Total prompt length: 9445 characters

üìù Prompt preview (first 500 chars):
--------------------------------------------------------------------------------
You are a music recommendation expert. Based on a user's Spotify playlist and Reddit community recommendations, suggest 5 songs they will likely enjoy.

USER'S PLAYLIST:
Playlist: Summer Nights
Total Tracks: 69

Top Tracks:
1. DAISIES - Justin Bieber
2. Running Up That Hill (A Deal With God) - Kate Bush
3. Lover, You Should've Come Over - Jeff Buckley
4. Silver Springs - 2004 Remaster - Fleetwood Mac
5. Knockin' On Heaven's Door - Guns N' Roses


REDDIT RECOMMENDATIONS FROM r/music:

Reddit Comm...
--------------------------------------------------------------------------------


## Step 5: Get Recommendations from ChatGPT

In [8]:
print("=" * 80)
print("CALLING CHATGPT API")
print("=" * 80)

try:
    response = openai_client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are a music recommendation expert. Always return valid JSON."},
            {"role": "user", "content": chatgpt_prompt}
        ],
        temperature=0.7,
        max_tokens=500
    )
    
    gpt_response = response.choices[0].message.content
    print(f"‚úÖ ChatGPT Response received")
    print(f"\nüìù Raw response:")
    print("-" * 80)
    print(gpt_response)
    print("-" * 80)
    
    # Parse JSON response
    gpt_recommendations = json.loads(gpt_response)
    
    print(f"\n‚úÖ Parsed {len(gpt_recommendations)} recommendations:")
    for idx, rec in enumerate(gpt_recommendations, 1):
        print(f"   {idx}. {rec['song']} - {rec['artist']}")
    
except Exception as e:
    print(f"‚ùå Error calling ChatGPT: {e}")
    gpt_recommendations = []

CALLING CHATGPT API
‚úÖ ChatGPT Response received

üìù Raw response:
--------------------------------------------------------------------------------
[
  {"song": "Love Yourself", "artist": "Justin Bieber"},
  {"song": "What Do You Mean", "artist": "Justin Bieber"},
  {"song": "Brutalism", "artist": "The Drums"},
  {"song": "Ain't No Sunshine", "artist": "Bill Withers"},
  {"song": "Worth It For The Feeling", "artist": "Rebecca Black"}
]
--------------------------------------------------------------------------------

‚úÖ Parsed 5 recommendations:
   1. Love Yourself - Justin Bieber
   2. What Do You Mean - Justin Bieber
   3. Brutalism - The Drums
   4. Ain't No Sunshine - Bill Withers
   5. Worth It For The Feeling - Rebecca Black


## Step 6: Search Spotify for Recommended Songs

In [9]:
def search_spotify_song(song_name, artist_name):
    """Search Spotify for a song and return full track object"""
    try:
        query = f"track:{song_name} artist:{artist_name}"
        results = sp.search(q=query, type='track', limit=1)
        
        if results['tracks']['items']:
            track = results['tracks']['items'][0]
            return {
                'name': track['name'],
                'artist': ', '.join([a['name'] for a in track['artists']]),
                'album': track['album']['name'],
                'release_date': track['album']['release_date'],
                'popularity': track['popularity'],
                'duration_ms': track['duration_ms'],
                'duration_readable': f"{track['duration_ms'] // 60000}:{(track['duration_ms'] % 60000) // 1000:02d}",
                'preview_url': track['preview_url'],
                'external_url': track['external_urls']['spotify'],
                'uri': track['uri'],
                'album_art': track['album']['images'][0]['url'] if track['album']['images'] else None,
                'id': track['id']
            }
    except Exception as e:
        print(f"   ‚ö†Ô∏è Error searching for '{song_name}': {e}")
    
    return None

print("=" * 80)
print("SEARCHING SPOTIFY FOR RECOMMENDATIONS")
print("=" * 80)

final_recommendations = []

for idx, rec in enumerate(gpt_recommendations, 1):
    print(f"\n[{idx}/{len(gpt_recommendations)}] Searching: {rec['song']} - {rec['artist']}")
    
    spotify_track = search_spotify_song(rec['song'], rec['artist'])
    
    if spotify_track:
        final_recommendations.append(spotify_track)
        print(f"         ‚úÖ Found on Spotify!")
        print(f"            Album: {spotify_track['album']}")
        print(f"            Popularity: {spotify_track['popularity']}/100")
        print(f"            URL: {spotify_track['external_url']}")
    else:
        print(f"         ‚ùå Not found on Spotify")

print(f"\n‚úÖ Successfully found {len(final_recommendations)}/{len(gpt_recommendations)} recommendations on Spotify")

SEARCHING SPOTIFY FOR RECOMMENDATIONS

[1/5] Searching: Love Yourself - Justin Bieber
         ‚úÖ Found on Spotify!
            Album: Purpose (Deluxe)
            Popularity: 86/100
            URL: https://open.spotify.com/track/50kpGaPAhYJ3sGmk6vplg0

[2/5] Searching: What Do You Mean - Justin Bieber
         ‚úÖ Found on Spotify!
            Album: Purpose (Deluxe)
            Popularity: 84/100
            URL: https://open.spotify.com/track/4B0JvthVoAAuygILe3n4Bs

[3/5] Searching: Brutalism - The Drums
         ‚úÖ Found on Spotify!
            Album: Brutalism
            Popularity: 24/100
            URL: https://open.spotify.com/track/6TlawN3gIL0ECeXdz2zcYj

[4/5] Searching: Ain't No Sunshine - Bill Withers
         ‚úÖ Found on Spotify!
            Album: Just As I Am
            Popularity: 83/100
            URL: https://open.spotify.com/track/1k1Bqnv2R0uJXQN4u6LKYt

[5/5] Searching: Worth It For The Feeling - Rebecca Black
         ‚úÖ Found on Spotify!
            Album

## Step 7: Display Final Recommendations

In [10]:
print("\n" + "=" * 80)
print("üéµ FINAL SONG RECOMMENDATIONS")
print("=" * 80)

if final_recommendations:
    for idx, track in enumerate(final_recommendations, 1):
        print(f"\n{idx}. {track['name']}")
        print(f"   Artist: {track['artist']}")
        print(f"   Album: {track['album']}")
        print(f"   Release: {track['release_date']}")
        print(f"   Duration: {track['duration_readable']}")
        print(f"   Popularity: {track['popularity']}/100")
        print(f"   üéß Listen: {track['external_url']}")
        if track['album_art']:
            print(f"   üñºÔ∏è  Album Art: {track['album_art']}")
        if track['preview_url']:
            print(f"   ‚ñ∂Ô∏è  Preview: {track['preview_url']}")
        print(f"   URI: {track['uri']}")
else:
    print("No recommendations found.")

print("\n" + "=" * 80)


üéµ FINAL SONG RECOMMENDATIONS

1. Love Yourself
   Artist: Justin Bieber
   Album: Purpose (Deluxe)
   Release: 2015-11-13
   Duration: 3:53
   Popularity: 86/100
   üéß Listen: https://open.spotify.com/track/50kpGaPAhYJ3sGmk6vplg0
   üñºÔ∏è  Album Art: https://i.scdn.co/image/ab67616d0000b273f46b9d202509a8f7384b90de
   URI: spotify:track:50kpGaPAhYJ3sGmk6vplg0

2. What Do You Mean?
   Artist: Justin Bieber
   Album: Purpose (Deluxe)
   Release: 2015-11-13
   Duration: 3:25
   Popularity: 84/100
   üéß Listen: https://open.spotify.com/track/4B0JvthVoAAuygILe3n4Bs
   üñºÔ∏è  Album Art: https://i.scdn.co/image/ab67616d0000b273f46b9d202509a8f7384b90de
   URI: spotify:track:4B0JvthVoAAuygILe3n4Bs

3. Brutalism
   Artist: The Drums
   Album: Brutalism
   Release: 2019-04-05
   Duration: 3:49
   Popularity: 24/100
   üéß Listen: https://open.spotify.com/track/6TlawN3gIL0ECeXdz2zcYj
   üñºÔ∏è  Album Art: https://i.scdn.co/image/ab67616d0000b273aae312c942585a942d391b24
   URI: spotify

## Step 8: View as DataFrame

In [11]:
if final_recommendations:
    recommendations_df = pd.DataFrame(final_recommendations)
    
    # Display simplified version
    display_df = recommendations_df[['name', 'artist', 'album', 'popularity', 'duration_readable', 'external_url']]
    
    print("üìä Recommendations Table:\n")
    display(display_df)
else:
    print("No recommendations to display")

üìä Recommendations Table:



Unnamed: 0,name,artist,album,popularity,duration_readable,external_url
0,Love Yourself,Justin Bieber,Purpose (Deluxe),86,3:53,https://open.spotify.com/track/50kpGaPAhYJ3sGm...
1,What Do You Mean?,Justin Bieber,Purpose (Deluxe),84,3:25,https://open.spotify.com/track/4B0JvthVoAAuygI...
2,Brutalism,The Drums,Brutalism,24,3:49,https://open.spotify.com/track/6TlawN3gIL0ECeX...
3,Ain't No Sunshine,Bill Withers,Just As I Am,83,2:06,https://open.spotify.com/track/1k1Bqnv2R0uJXQN...
4,Worth It for the Feeling,Rebecca Black,Worth It for the Feeling,40,3:07,https://open.spotify.com/track/0dKCtdoAH23AWJ5...


## Step 9: Export All Data (Logging)

In [None]:
# Create comprehensive log of entire process
complete_log = {
    'metadata': {
        'timestamp': datetime.now().isoformat(),
        'playlist_url': PLAYLIST_URL,
        'subreddit': SUBREDDIT_NAME,
        'num_recommendations_requested': NUM_RECOMMENDATIONS
    },
    'step_1_playlist_data': {
        'name': playlist_data['name'],
        'owner': playlist_data['owner'],
        'total_tracks': playlist_data['total_tracks'],
        'tracks': tracks_data
    },
    'step_2_top_tracks_used': [
        {'name': t['name'], 'artist': t['artist_names'], 'popularity': t['popularity']}
        for t in top_tracks
    ],
    'step_3_top_artists_used': all_artists,
    'step_4_reddit_data': {
        'total_posts_found': len(all_reddit_data),
        'total_comments': sum(len(post['comments']) for post in all_reddit_data),
        'posts_sample': all_reddit_data[:5]  # Save sample to avoid huge file
    },
    'step_5_chatgpt_prompt': chatgpt_prompt,
    'step_6_chatgpt_response': gpt_recommendations if gpt_recommendations else [],
    'step_7_spotify_search_results': final_recommendations,
    'summary': {
        'playlist_tracks_analyzed': len(tracks_data),
        'reddit_posts_found': len(all_reddit_data),
        'gpt_recommendations_generated': len(gpt_recommendations) if gpt_recommendations else 0,
        'spotify_tracks_found': len(final_recommendations),
        'success_rate': f"{len(final_recommendations)}/{NUM_RECOMMENDATIONS}"
    }
}

# Save to JSON
log_filename = f"recommendation_log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
with open(log_filename, 'w', encoding='utf-8') as f:
    json.dump(complete_log, f, indent=2, ensure_ascii=False)

print("=" * 80)
print("üìÅ DATA EXPORT & LOGGING")
print("=" * 80)
print(f"‚úÖ Complete log saved to: {log_filename}")
print(f"   File size: {os.path.getsize(log_filename) / 1024:.2f} KB")

# Also save just the recommendations as a separate file
if final_recommendations:
    recommendations_filename = f"recommendations_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
    with open(recommendations_filename, 'w', encoding='utf-8') as f:
        json.dump({
            'playlist': playlist_data['name'],
            'recommendations': final_recommendations
        }, f, indent=2, ensure_ascii=False)
    print(f"‚úÖ Recommendations saved to: {recommendations_filename}")
    
    # Save DataFrame as CSV
    csv_filename = f"recommendations_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
    recommendations_df.to_csv(csv_filename, index=False)
    print(f"‚úÖ CSV saved to: {csv_filename}")

print("=" * 80)

## Step 10: Summary Statistics

In [None]:
print("\n" + "=" * 80)
print("üìä RECOMMENDATION SYSTEM SUMMARY")
print("=" * 80)

print(f"\nüéµ INPUT:")
print(f"   Playlist: {playlist_data['name']}")
print(f"   Tracks Analyzed: {len(tracks_data)}")
print(f"   Top Tracks Used: {len(top_tracks)}")
print(f"   Top Artists Used: {len(all_artists)}")

print(f"\nüí¨ REDDIT DATA:")
print(f"   Subreddit: r/{SUBREDDIT_NAME}")
print(f"   Posts Found: {len(all_reddit_data)}")
print(f"   Total Comments: {sum(len(post['comments']) for post in all_reddit_data)}")
print(f"   Recommendation-focused Posts: {len(all_reddit_data)}")

print(f"\nü§ñ CHATGPT:")
print(f"   Prompt Length: {len(chatgpt_prompt)} characters")
print(f"   Recommendations Generated: {len(gpt_recommendations) if gpt_recommendations else 0}")

print(f"\nüéµ OUTPUT:")
print(f"   Spotify Tracks Found: {len(final_recommendations)}/{NUM_RECOMMENDATIONS}")
if final_recommendations:
    avg_popularity = sum(t['popularity'] for t in final_recommendations) / len(final_recommendations)
    print(f"   Average Popularity: {avg_popularity:.1f}/100")

print(f"\n‚úÖ SUCCESS RATE: {len(final_recommendations)}/{NUM_RECOMMENDATIONS} ({len(final_recommendations)/NUM_RECOMMENDATIONS*100:.0f}%)")

print("\n" + "=" * 80)
print("üéâ RECOMMENDATION SYSTEM COMPLETE!")
print("=" * 80)