In [95]:
import pandas as pd
import random as rd
import numpy as np 
import datetime
import matplotlib.pyplot as plt
from chatbot import Chatbot
import nltk
from nltk.stem import WordNetLemmatizer

In [108]:
df = pd.read_csv('music_genre.csv')
df.head()

Unnamed: 0,instance_id,artist_name,track_name,popularity,acousticness,danceability,duration_ms,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,obtained_date,valence,music_genre
0,32894.0,Röyksopp,Röyksopp's Night Out,27.0,0.00468,0.652,-1.0,0.941,0.792,A#,0.115,-5.201,Minor,0.0748,100.889,4-Apr,0.759,Electronic
1,46652.0,Thievery Corporation,The Shining Path,31.0,0.0127,0.622,218293.0,0.89,0.95,D,0.124,-7.043,Minor,0.03,115.002,4-Apr,0.531,Electronic
2,30097.0,Dillon Francis,Hurricane,28.0,0.00306,0.62,215613.0,0.755,0.0118,G#,0.534,-4.617,Major,0.0345,127.994,4-Apr,0.333,Electronic
3,62177.0,Dubloadz,Nitro,34.0,0.0254,0.774,166875.0,0.7,0.00253,C#,0.157,-4.498,Major,0.239,128.014,4-Apr,0.27,Electronic
4,24907.0,What So Not,Divide & Conquer,32.0,0.00465,0.638,222369.0,0.587,0.909,F#,0.157,-6.266,Major,0.0413,145.036,4-Apr,0.323,Electronic


In [109]:
def clean_up_data(df):
    #We get rid of the useless parameters 
    df = df.drop(['acousticness','instrumentalness','key','liveness','loudness','mode','speechiness','obtained_date','valence'], axis=1)
    df = df.dropna()

    # instance_id should be an integer
    df["instance_id"] = [int(x) for x in df["instance_id"]]

    #modifie the df["danceability"] to be in %
    df["danceability"] = round(df["danceability"]*100)

    #drop the  line if df["duration_ms"] is Nan and modifie the df["duration_ms"] to be in minutes
    df = df[df["duration_ms"].notnull()]
    df["duration_ms"] = df["duration_ms"].apply(lambda x: f'{round(x/1000)//60}:{round(x/1000)%60}')
    df = df.rename(columns={"duration_ms": "duration"})

    #modifie the df["energy"] to be in %
    df["energy"] = round(df["energy"]*100)

    #replace all ? by 0 in the df['tempo']
    df['tempo'] = df['tempo'].replace('?',0)
    #convert the tempo to float and round it
    df['tempo'] = df['tempo'].apply(lambda x: round(float(x)))

    return df

In [110]:
df = clean_up_data(df)
df.head()

Unnamed: 0,instance_id,artist_name,track_name,popularity,danceability,duration,energy,tempo,music_genre
0,32894,Röyksopp,Röyksopp's Night Out,27.0,65.0,0:0,94.0,101,Electronic
1,46652,Thievery Corporation,The Shining Path,31.0,62.0,3:38,89.0,115,Electronic
2,30097,Dillon Francis,Hurricane,28.0,62.0,3:36,76.0,128,Electronic
3,62177,Dubloadz,Nitro,34.0,77.0,2:47,70.0,128,Electronic
4,24907,What So Not,Divide & Conquer,32.0,64.0,3:42,59.0,145,Electronic


In [97]:
df_random = df.sample(n=1)
df_random

Unnamed: 0,instance_id,artist_name,track_name,popularity,acousticness,danceability,duration_ms,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,obtained_date,valence,music_genre
8446,84921.0,Ling tosite sigure,Turbocharger ON,18.0,0.003,0.246,317667.0,0.882,0.845,F,0.367,-2.887,Minor,0.076,131.558,4-Apr,0.275,Anime


In [98]:
#display all the genre
df['music_genre'].unique()

array(['Electronic', 'Anime', nan, 'Jazz', 'Alternative', 'Country',
       'Rap', 'Blues', 'Rock', 'Classical', 'Hip-Hop'], dtype=object)

In [65]:
def clean_up_sentence(sentence): # clean up the sentence by tokenizing it to words and lemmatizing the words 
    sentence_words = nltk.word_tokenize(sentence) # tokenize the sentence into words
    sentence_words = [WordNetLemmatizer().lemmatize(word.lower()) for word in sentence_words] # lemmatize the words inside the sentence_words
    return sentence_words # return the sentence_words list 

def bag_of_words(sentence, words): # create the bag of words for the sentence
    sentence_words = clean_up_sentence(sentence)  # clean up the sentence
    bag = [0] * len(words) # create an empty list with the length of the words list
    for s in sentence_words: # loop through the sentence_words tokenized into words and lemmatized
        for i, word in enumerate(words): # loop through the words list
            if word == s: # if the word is equal to the word in the sentence_words list then 0 is replace by 1 in the bag list
                bag[i] = 1
    return np.array(bag) # return the bag list in a numpy array format

In [66]:
sentence = "could you give me a random rock or rap music with a high energy?"
sentence_2 = "could you give me a random music from the genre rock or rap?"
sentence_3 = "could you give me a random music with a high energy and a high tempo?"
words = clean_up_sentence(sentence_2)
print(words)


['could', 'you', 'give', 'me', 'a', 'random', 'music', 'from', 'the', 'genre', 'rock', 'or', 'rap', '?']


In [67]:
def clean_up_genre(genre): # clean up the genre by tokenizing it to words
    genres = []
    genre_words = nltk.word_tokenize(genre) # tokenize the genre into words
    #list of all music_genre from df
    music_genre = df['music_genre'].unique()
    #list of all music_genre from df in lower case
    music_genre_lower = [x.lower() for x in music_genre]
    #look the word inside the genre_words list and if it is in the music_genre_lower list then return the genre
    for word in genre_words:
        if word.lower() in music_genre_lower:
            genres.append(word)

    return genres # return the genres' list

In [81]:
genres = clean_up_genre(sentence)
print(genres)

#check if genres is empty
if len(genres) == 0:
    genres = None 

if genres is not None:
    df_random = df[df["music_genre"].str.lower().isin(genres)]
    df_random = df_random.sample(n=1)

df_random

['rock', 'rap']


Unnamed: 0,instance_id,artist_name,track_name,popularity,danceability,duration,energy,tempo,music_genre
38779,85702,WALK THE MOON,Avalanche,58.0,59.0,3:39,86.0,138,Rock


In [86]:
music_genre_chosen = df_random["music_genre"].values[0]
text = f"Here is a random {music_genre_chosen} music:"
print(text)

Here is a random Rock music:


In [44]:
if 'genre' in words:
    if words[words.index('genre')+2] == 'or':
        genre = [words[words.index('genre')+1],words[words.index('genre')+3]]
    genre = [words[words.index('genre') + 1]]
    print(genre)
    df_random = df[df['music_genre'].str.lower() == genre[0].lower()]
    df_random = df_random.sample(n=1)
    print(df_random)

rock
       instance_id        artist_name      track_name  popularity  \
38801        41909  Twenty One Pilots  Oh Ms Believer        66.0   

       danceability duration  energy  tempo music_genre  
38801          50.0     3:37    33.0      0        Rock  


In [99]:
def clean_up_random_infos(infos):
    features = {}
    #tokemize the string
    infos_words = nltk.word_tokenize(infos)
    # if energy is in the string look for the word just before and return add to the energy key the value of that word into features
    if 'energy' in infos_words:
        energy_index = infos_words.index('energy')
        if energy_index > 0:
            features['energy'] = infos_words[energy_index - 1]
    # if tempo is in the string look for the word just before and return add to the tempo key the value of that word into features
    if 'tempo' in infos_words:
        tempo_index = infos_words.index('tempo')
        if tempo_index > 0:
            features['tempo'] = infos_words[tempo_index - 1]
    # if popularity is in the string look for the word just before and return add to the pupolarity key the value of that word into features
    if 'popularity' in infos_words:
        popularity_index = infos_words.index('popularity')
        if popularity_index > 0:
            features['popularity'] = infos_words[popularity_index - 1]
    # if danceability is in the string look for the word just before and return add to the danceability key the value of that word into features
    if 'danceability' in infos_words:
        danceability_index = infos_words.index('danceability')
        if danceability_index > 0:
            features['danceability'] = infos_words[danceability_index - 1]
    return features

def random_music(**kwargs):
    text, embed = None, None
    infos = kwargs.get('information',None)
    genre, artist, danceability, energy, tempo, popularity = clean_up_random_infos(infos)

    if infos == None:
        df_random = df.sample(n=1)
    else:
        df_random = df
        df_random = df_random[df_random['music_genre'].str.lower() == genre.lower()] if genre != None else df_random
        df_random = df_random[df_random['artist_name'].str.lower() == artist.lower()] if artist != None else df_random
        df_random = df_random[df_random['danceability'] >= danceability['min']][df_random['danceability']<= danceability['max']] if danceability != None else df_random
        df_random = df_random[df_random['energy'] >= energy['min']][df_random['energy']<= energy['max']] if energy != None else df_random
        df_random = df_random[df_random['tempo'] >= tempo['min']][df_random['tempo']<= tempo['max']] if tempo != None else df_random
        df_random = df_random[df_random['popularity'] >= popularity['min']][df_random['popularity']<= popularity['max']] if popularity != None else df_random
        df_random = df_random.sample(n=1)

In [100]:
def set_up_random_feature(features, feature_name):
    feature = {'min': 0, 'max': 100}
    if features[f'{feature_name}'] == 'high':
       feature['min'] = df[f'{feature_name}'].mean() 
       feature['max'] = df[f'{feature_name}'].max()
    elif features[f'{feature_name}'] == 'low':
        feature['min'] = df[f'{feature_name}'].min()
        feature['max'] = df[f'{feature_name}'].mean()
    elif features[f'{feature_name}'] == 'medium':
        feature['min'] = df[f'{feature_name}'].quantile(0.25)
        feature['max'] = df[f'{feature_name}'].quantile(0.75)

    return feature
    

In [107]:
#test set_up_random_feature
features = {'energy': 'high', 'tempo': 'low', 'popularity': 'medium'}
set_up_random_feature(features, 'tempo')

{'min': 0, 'max': 108.00916}

In [92]:
#test the function clean_up_random_infos
string = "a low energy, a high tempo, a low danceability, a high popularity"
test_5 = clean_up_random_infos(string)
print(test_5)

{'energy': 'low', 'tempo': 'high', 'popularity': 'high', 'danceability': 'low'}


In [8]:
df = pd.read_csv('music_genre.csv')

In [9]:
df = df.drop(['acousticness','instrumentalness','key','liveness','loudness','mode','speechiness','obtained_date','valence'], axis=1)
df.head()

Unnamed: 0,instance_id,artist_name,track_name,popularity,danceability,duration_ms,energy,tempo,music_genre
0,32894.0,Röyksopp,Röyksopp's Night Out,27.0,0.652,-1.0,0.941,100.889,Electronic
1,46652.0,Thievery Corporation,The Shining Path,31.0,0.622,218293.0,0.89,115.002,Electronic
2,30097.0,Dillon Francis,Hurricane,28.0,0.62,215613.0,0.755,127.994,Electronic
3,62177.0,Dubloadz,Nitro,34.0,0.774,166875.0,0.7,128.014,Electronic
4,24907.0,What So Not,Divide & Conquer,32.0,0.638,222369.0,0.587,145.036,Electronic


In [16]:
# pick up the artist name of the music name equal Hurricane
artist = df[df['track_name'] == 'Hurricane']
artist['artist_name'].values


array(['Dillon Francis', 'Eprom', 'Theory of a Deadman', 'Grizfolk',
       'Dustin Lynch', 'The Band Of Heathens', 'Luke Combs',
       'Possessed by Paul James', 'The Band Of Heathens',
       'Panic! At The Disco'], dtype=object)

In [13]:
for data in df:
    if data['track_name'] == 'Hurricane':
        print(data)

TypeError: string indices must be integers

In [23]:
def test(a,b):
    return{'sum': a+b,'sub': a-b,'mul': a*b,'div': a/b}

In [29]:
test(3,10)['sum']

13

In [2]:
message = 'bonjour je suis ici'

#print the message and his type
print(message)
print(type(message))


bonjour je suis ici
<class 'str'>


In [44]:
def test_2(name,age):
    return_list = []
    rd_number = rd.randint(0,1)
    print(rd_number)
    return_list.append(f"your name is {name} and you are {age} years old")
    return_list.append(f"you are named {name} and you are {age}")
    return return_list[rd_number]
test_2('benjamin', '20')

0


'your name is benjmain and you are 20 years old'

In [5]:
datas = {"music" : [
                    "who is the artist of (.*)",
                    "who whote (.*)"
                ]}
def test_3(datas):
    for i, data in datas.items():
        print(i, data)
test_3(datas)    

music ['who is the artist of (.*)', 'who whote (.*)']


In [6]:
A  = 'bonjour'
class color:
   PURPLE = '\033[95m'
   CYAN = '\033[96m'
   DARKCYAN = '\033[36m'
   BLUE = '\033[94m'
   GREEN = '\033[92m'
   YELLOW = '\033[93m'
   RED = '\033[91m'
   BOLD = '\033[1m'
   UNDERLINE = '\033[4m'
   END = '\033[0m'

print(color.BOLD, f'bonjour {A}', color.GREEN)
print('hello world !')

[1m bonjour bonjour [92m
hello world !


In [1]:
#how to count in a dataframe te number of rows where music is equal to Hurricane and artist is equal to the artist name
df[(df['music'] == 'Hurricane') & (df['artist_name'] == 'The Chainsmokers')] 

# convert tempo into float
df['tempo'] = df['tempo'].astype(float)

NameError: name 'df' is not defined

In [5]:
#how to convert a dict into a dataframe
import pandas as pd
d = {'a': [1,2,3], 'b': [2,3,4], 'c': [3,4,5]}
df = pd.DataFrame(d)

df
#add a line instance to my dataframe
df.loc[len(df)] = [4,5,6]
#how to add a column to my dataframe
df['d'] = [4,5,6]

#how to append a 

Unnamed: 0,a,b,c
0,1,2,3
1,2,3,4
2,3,4,5


In [7]:
#create a list of dict of random data
import random as rd
list_of_dict = []
for i in range(10):
    list_of_dict.append({'a': i, 'b': i*2, 'c': i*3})

#shuffle the list
rd.shuffle(list_of_dict)
print (list_of_dict)

[{'a': 3, 'b': 6, 'c': 9}, {'a': 9, 'b': 18, 'c': 27}, {'a': 2, 'b': 4, 'c': 6}, {'a': 7, 'b': 14, 'c': 21}, {'a': 1, 'b': 2, 'c': 3}, {'a': 6, 'b': 12, 'c': 18}, {'a': 0, 'b': 0, 'c': 0}, {'a': 5, 'b': 10, 'c': 15}, {'a': 4, 'b': 8, 'c': 12}, {'a': 8, 'b': 16, 'c': 24}]


In [9]:
def clean_up_string(string):
    #clean string drop the potential question mark at the end of the music name
    if ' ?' in string:
        string = string.replace(' ?','')
    elif '?' in string:
        string = string.replace('?','')
    return string

In [11]:
test = clean_up_string('Hurricane wrote by Dillon Francis ?')
test

'Hurricane wrote by Dillon Francis'

In [2]:
a, b, c = 0, 0, 0

print(a, b, c)

0 0 0


In [3]:
def test_4(d):
    a = d**2
    b = d**3
    c = d**4
    return a, b, c

a, b, c = test_4(2)
print(a, b, c)

4 8 16


true
