In [2]:
from dotenv import load_dotenv
load_dotenv()

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
from pprint import pprint
from dataclasses import dataclass
from typing import *
from pprint import pprint
import jsons

import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
# set up and parsing of data
spotify = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials())

df = pd.read_csv('../resources/bubbleflexe-rv.csv', usecols=[1, 2], names=['bsides', 'tt'])
df = df.drop(labels=0, axis=0).reset_index(drop=True)
for c in df.columns:
    df[c] = df[c].apply(lambda x: str(x).split(';'))
# unify the responded songs
df['unified'] = df['bsides'] + df['tt']

In [4]:
unique_bsides = set()
for i in df['bsides']:
    unique_bsides.update(i)

unique_tt = set()
for i in df['tt']:
    unique_tt.update(i)

unique_songs = unique_bsides.union(unique_tt)
unique_songs.remove('nan')

## Questions
1. For a given song, what is the frequency list of all other songs to it? -> Done
2. How to use the size of each individual response? -> **Not yet added**
3. What about normalizing for song popularity -> Using percents, Done

In [17]:
class Song:
    # todo make init based on name and artist make a spotify req to populate the object's name and artist
    def __init__(self, name: str, artist: str = 'Red Velvet', id: str = None, spotify_af: dict = None, tags: list = None):
        self.name = name
        self.artist = artist
        self.id = id or self.__get_track_id()
        self.spotify_af = spotify_af or self.__get_audio_analysis()
        self.tags = tags
        # self.freq_list = self.__get_freq_list()
        # self.total_listens = self.freq_list[self.name]
        # self.percent_list = None # after all songs are created, then we can populate the "chance" list 
    
    def __get_track_id(self):
        r = spotify.search(q=f'{self.name} artist:{self.artist}', type='track')
        return r['tracks']['items'][0]['id']
    
    def __get_audio_analysis(self):
        ft = spotify.audio_features(self.id)[0]
        delkeys = ['type', 'id', 'uri', 'track_href', 'analysis_url']
        [ft.pop(x) for x in delkeys]
        return ft

    def __repr__(self):
        # TODO see if this is safe
        return f'{self.name} - {self.artist} - {str(self.tags)}'

    def __str__(self):
        return f'{self.name} - {self.artist} - {str(self.tags)}'

    def __key(self):
        return (self.name) # TODO THIS IS ONLY DUE TO WORKING WITH RV CONTENT.
        # TODO find out a better way to make SongCollection like a dictionary but with regex matching for song name; make easier to extract from responses

    def __hash__(self):
        # note that we are heavily breaking convention here
        return hash(self.__key())

    def __eq__(self, o):
        if isinstance(o, Song):
            # if self.name == o.name and self.artist == o.artist:
            #     return True
            return self.__key() == o.__key()
        elif isinstance(o, str):
            # this is more useful for when there are multiple artists. we are only working with RV songs, so it's looser
            # if o == str(self)[:str(self).rfind('-')-1]:
            #     return True
            if o.lower() == self.name.lower():
                return True
        return False

class SongCollection:
    def __init__(self, songs: list, responses: pd.DataFrame):
        self.songs = songs # TODO create songs from responses
        self.responses = responses

    def get(self, match) -> Song:
        return self.__get_by_eq(match)

    def __get_by_name(self, name):
        for s in self.songs:
            if name == s.name:
                return s
        raise Exception('no matching Song found')

    def __get_by_eq(self, obj):
        r = list(filter(lambda s: s == obj, self.songs))
        if len(r) > 1:
            raise Exception('too many songs matched')
        elif len(r) == 0:
            raise Exception('no matching Song found')
        return r[0]

    # def get_song_names(self) -> list[str]:
    #     return [s.name for s in self.songs]

    def get_count_list(self) -> dict[Song, int]:
        '''Return dict representing # of times a Song was mentioned in responses for all Songs in the collection'''
        flist = {}
        for s in self.songs:
            flist[s] = 0
        for u in self.responses['unified']:
            for s in u:
                if s == 'nan':
                    continue
                flist[s] += 1
        return {k: v for k, v in sorted(flist.items(), key=lambda item: item[1])}

    def get_song_count_list(self, songmatch) -> dict[Song, int]:
        '''Return sorted dict of the count of other songs in the responses for a given Song (det. by songmatch)'''
        song = self.get(songmatch)

        flist = {}
        for s in self.songs:
            flist[s] = 0
        mask = self.responses['unified'].apply(lambda l: song.name in l)
        entries = self.responses[mask]['unified']
        for u in entries:
            for s in u:
                if s == 'nan':
                    continue
                flist[s] += 1

        return {k: v for k, v in sorted(flist.items(), key=lambda item: item[1])}

    def get_song_inbound_percent_list(self, songmatch, dig=2) -> dict[Song, float]:
        '''
        Return the inbound percents for a song. 
        i.e. for each Song s in a given Song t's count list, divide s's value in t's count list by the count of s in the responses
        This gives us a way to see the percent of s's listeners that listen to t.
        '''
        flist = self.get_song_count_list(songmatch)
        root_flist = self.get_count_list()
        for s in flist:
            flist[s] /= root_flist[s]
            flist[s] = round(flist[s], dig)
        return {k: v for k, v in sorted(flist.items(), key=lambda item: item[1])}

    def get_song_outbound_percent_list(self, songmatch, dig=2) -> dict[Song, float]:
        '''
        Return the outbound percents for a song
        i.e. for each Song s in a given Song t's count list, divide s's value in t's count list by the count of t in the responses
        This gives us a way to see the percent of t's listeners that listen to s.
        '''
        flist = self.get_song_count_list(songmatch)
        root_listens = self.get_count_list()[self.get(songmatch)]
        for s in flist:
            flist[s] /= root_listens
            flist[s] = round(flist[s], dig)
        return {k: v for k, v in sorted(flist.items(), key=lambda item: item[1])}

    def update():
        pass
    def add():
        pass
    def pop():
        pass
    
    # @staticmethod
    # def create_songs(names, artist):
    #     # move to song collection
    #     songdict = {}
    #     for s in names:
    #         songdict[s] = Song(s, artist)
    #     for s in songdict.values():
    #         s.construct_percent_list()
    #     # for s in songs
        
    
    



In [10]:
songs = [Song(name) for name in unique_songs]

In [18]:
col = SongCollection(responses=df, songs=songs)

In [161]:
with open('songcol', 'w') as f:
    f.write(jsons.dumps(col))

In [6]:
# TODO fix unhashable type dict 'for s in songs'
with open('songcol', 'r') as f:
    col = jsons.loads(f.read(), SongCollection)

## Update
Now, we have inbound data (x% of Bad Boy listeners listen to Aitai-tai). What about reverse? (Aitai-tai listeners listen to _).
Note that it's not necessary that there is a 1:1 relationship (Song A might have high chance to listen to song B, but there are a certain amount of song B listeners that don't like song A).

In [30]:
# this gives us what the update is talking about.
col.get_song_inbound_percent_list('Aitai-tai')

{Psycho - Red Velvet - None: 0.2,
 Bad Boy - Red Velvet - None: 0.21,
 Peek-A-Boo - Red Velvet - None: 0.23,
 Monster - Red Velvet - None: 0.24,
 Sunny Side Up! - Red Velvet - None: 0.24,
 Kingdom Come - Red Velvet - None: 0.24,
 Red Flavor - Red Velvet - None: 0.25,
 Russian Roulette - Red Velvet - None: 0.25,
 In & Out - Red Velvet - None: 0.25,
 Power Up - Red Velvet - None: 0.27,
 Zimzalabim - Red Velvet - None: 0.27,
 Dumb Dumb - Red Velvet - None: 0.27,
 Naughty - Red Velvet - None: 0.27,
 You Better Know - Red Velvet - None: 0.27,
 Automatic - Red Velvet - None: 0.28,
 One of These Nights - Red Velvet - None: 0.28,
 Umpah Umpah - Red Velvet - None: 0.28,
 Ice Cream Cake - Red Velvet - None: 0.28,
 RBB - Red Velvet - None: 0.28,
 Happiness - Red Velvet - None: 0.29,
 Be Natural - Red Velvet - None: 0.3,
 Remember Forever - Red Velvet - None: 0.3,
 Eyes Locked Hands Locked - Red Velvet - None: 0.3,
 La Rouge - Red Velvet - None: 0.3,
 I Just - Red Velvet - None: 0.3,
 Love Is The 

## Graphing, Network Analysis