In [36]:
import pandas as pd
import spotipy  
from spotipy.oauth2 import SpotifyClientCredentials
import os
from dotenv import load_dotenv
from musixmatch import Musixmatch
import nltk
from nltk.corpus import stopwords
import random
from tqdm import tqdm
import matplotlib.pyplot as plt

CLIENT_ID = os.getenv('CLIENT_ID')
SECRET_ID = os.getenv('SECRET_ID')
ACCESS_TOKEN = os.getenv('ACCESS_TOKEN')
MUSICXMATCH = os.getenv('MUSICXMATCH')



musixmatch = Musixmatch(MUSICXMATCH)

client_credentials_manager = SpotifyClientCredentials(client_id=CLIENT_ID, client_secret=SECRET_ID)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)


# load environment variables from .env file
load_dotenv()




palabras = stopwords.words('spanish')
#I suspect that the word 'no' is a very important word when we are talking about love or hate
#so I will remove it from the stopwords list
palabras.remove('no')




In [46]:


class Song:
    def __init__(self, name, artist, label, genre = None):
        self.name = name
        self.artist = artist
        self.genre = genre
        self.label = label

    def __str__(self):
        return self.name + " by " + self.artist


    def get_lyrics(self,):
        song = musixmatch.matcher_track_get(self.name,self.artist)
        track_id = song['message']['body']['track']['track_id']
        lyrics = musixmatch.track_lyrics_get(track_id=track_id)

        if lyrics['message']['body'] == []:
            lyrics = ''

        else:
            lyrics = lyrics['message']['body']['lyrics']['lyrics_body'].split("...")[0]
        return lyrics


    def get_name(self):
        return self.name

    def get_artist(self):
        return self.artist

    def get_words(self):
       
        words = nltk.word_tokenize(self.get_lyrics())
        words = [word.lower() for word in words if word.isalpha()]
        words = [word for word in words if word not in palabras]
        return words
    
    def get_label(self):
        return self.label
    

    def get_id(self):
        results = sp.search(self.name + " " + self.artist)
        return results['tracks']['items'][0]['id']






class Playlist:

    all_playlists = []

    def __init__(self,url,label, genre=None):
        self.url = url
        self.genre = genre
        self.label = label
        self.__class__.all_playlists.append(self)
  
    def get_songs(self):
        results = sp.playlist(self.url)["tracks"]
        tracks = results['items']
        while results['next']:
            results = sp.next(results)
            tracks.extend(results['items'])
    
        SongList = []
        for track in tracks:
            SongList.append(Song(track['track']['name'], track['track']['artists'][0]['name'],self.label,genre=self.genre))

        return SongList

    
    def info(self):
        print(" Genre: " +self.genre,"- Size: " + str(len(self.get_songs())),"- Label: ",self.label)

    @classmethod
    def total_length(cls):
        total = 0
        for playlist in cls.all_playlists:
            total += len(playlist.get_songs())
        return total
    


#Define the MusicDataset class with and class variable that will be a list of all the instances of playlist
#and add_playlist method to add a playlist to the list that receives an object playlist
#and a method that returns the total number of songs in all the playlists, this method will be a class method
#and will be called total_length, also a method that get a random song from all the playlists

class MusicDataset:
    all_playlists = []

    def __init__(self):
        pass

    def add_playlist(self,playlist):
        self.__class__.all_playlists.append(playlist)

    def get_playlists(self):
        return MusicDataset.all_playlists
    
    def total_length(self):
        total = 0
        for playlist in self.__class__.all_playlists:
            total += len(playlist.get_songs())
        return total
    
    def get_all_songs(self):
        all_songs = []
        for playlist in self.__class__.all_playlists:
            all_songs.extend(playlist.get_songs())
        return all_songs

    @classmethod
    def get_random_song(cls):
        random_playlist = random.choice(cls.all_playlists)
        return random.choice(random_playlist.get_songs())
   




#ranch_1 = Playlist('https://open.spotify.com/playlist/1oj5pzkKVfNDg6MDhk2XHf?si=b279724e3aaf45ab',1,'ranchera')
regga_1 = Playlist('https://open.spotify.com/playlist/5d2djDHRUTDOgTuTjDGpk5',1,'reggaeton')
#mixed_1 = Playlist('https://open.spotify.com/playlist/0nCQsteTsCEno08krzKai6',1,'mixed')
#mixed_2 = Playlist('https://open.spotify.com/playlist/4bPJZRZ3K7mVqoOoHQDipu',1,'mixed')
#mixed_3 = Playlist('https://open.spotify.com/playlist/7fnKiS1dem6SlOhl9DM1Bd',1,'mixed')
#regga_3 = Playlist('https://open.spotify.com/playlist/1wylHL6nSxcYHlGf5mDzR2',1,'reggaeton')
#ranch_2 = Playlist('https://open.spotify.com/playlist/1mwDUXYSjQWKnbvWvu11TP',1,'ranchera')
#ranch_3 = Playlist('https://open.spotify.com/playlist/7DhZIOgM52v1LHOly8icGU',1,'ranchera')
#bacha_1 = Playlist('https://open.spotify.com/playlist/1hTrF2j2ckMGrjM5myfBBE',1,'bachata')

musicDataset = MusicDataset()


for playlist in Playlist.all_playlists:
    musicDataset.add_playlist(playlist)





#Make a dataset with the songs from the musicDataset object.
#The oclumns will be the song id, the song name, the artist name, the genre, the label and the lyrics

dataset = []

allsongs = musicDataset.get_all_songs()
allsongs

lyricsLength = []

# for song in tqdm(allsongs):
#     dataset.append([song.get_id(),song.get_name(),song.get_artist(),playlist.genre,playlist.label,song.get_lyrics()])

# for song in tqdm(allsongs):
#     lyricsLength.append((song.get_name(),len(song.get_words())))

# lyricsLength = pd.DataFrame(lyricsLength,columns=['song','words'])

for song in allsongs[45:50]


[<__main__.Song at 0x7ff02e6dc350>,
 <__main__.Song at 0x7ff02e6dc390>,
 <__main__.Song at 0x7ff02e6dc3d0>,
 <__main__.Song at 0x7ff02e6dc410>,
 <__main__.Song at 0x7ff02e6dc450>]

In [43]:


#Plot the histogram of the number of words in the lyrics of the songs

plt.figure(figsize=(10,5))
plt.hist(lyricsLength['words'],bins=30)
plt.title('Histogram of the number of words in the lyrics of the songs')


TypeError: list indices must be integers or slices, not str

<Figure size 1000x500 with 0 Axes>