# Genrify Project
## Music genre prediction

Henri Toussaint<br>
Victor Saint Guilhem<br>
Benoît Lafon<br>

The project sets out to predict the genre of a music using the Spotify API.

## Data Loading

In [1]:
import csv
import numpy as np
import matplotlib.pyplot as plt
%matplotlib notebook

with open('music_collection.csv', newline='') as csvfile:
    spamreader = csv.reader(csvfile)
    data = []
    for row in spamreader:
        data.append(row)

ndata = np.array(data)
data_attributes = ndata[1::, 2:14]
data_target = ndata[1::, 14]
print("Number of instances: " + str(np.shape(data_attributes)[0]))
print("Number of features: " + str(np.shape(data_attributes)[1]))
print(ndata[0, 2:14])


Number of instances: 200
Number of features: 12
['acousticness' 'danceability' 'duration_ms' 'energy' 'instrumentalness'
 'key' 'liveness' 'loudness' 'speechiness' 'tempo' 'time_signature'
 'valence']


# Data Loading Using Pandas

In [2]:
import pandas as pd

data = pd.read_csv("music_collection.csv")
data_attributes = data.loc[:,'acousticness':'valence']

print('Number of instances: ' + str(data.shape[0]))
print('Number of attributes: ' + str(data_attributes.shape[1]))
print('Attributes:')
for i in data_attributes.columns.values:
    print('\t'+str(i))

Number of instances: 200
Number of attributes: 12
Attributes:
	acousticness
	danceability
	duration_ms
	energy
	instrumentalness
	key
	liveness
	loudness
	speechiness
	tempo
	time_signature
	valence


In [3]:
data.head()

Unnamed: 0,artist,track,acousticness,danceability,duration_ms,energy,instrumentalness,key,liveness,loudness,speechiness,tempo,time_signature,valence,genre
0,Keith Jarrett,Everything I Love,0.876,0.385,273973,0.31,0.942,0,0.127,-17.769,0.0351,105.816,4.0,0.38,jazz
1,Ella Fitzgerald,It's Only A Paper Moon,0.958,0.732,156933,0.138,0.0,10,0.107,-14.28,0.0431,118.44,4.0,0.627,jazz
2,Charles Mingus,Self-Portrait In Three Colours,0.98,0.224,186707,0.0937,0.784,10,0.107,-15.641,0.0343,168.418,3.0,0.098,jazz
3,Eric Reed,I Still Believe in You,0.964,0.605,285000,0.0223,0.907,1,0.0851,-22.559,0.0499,67.851,4.0,0.218,jazz
4,Ornette Coleman,Lonely Woman,0.412,0.467,299507,0.316,0.000351,9,0.094,-12.072,0.0297,81.013,4.0,0.26,jazz


# Target variable

In [4]:
#plt.figure('Genres')
data['genre'] = data['genre'].astype("category")
data_target = data.loc[:,'genre']
count_genre = data.groupby("genre").size()
count_genre.plot(kind='bar',title='Genres', figsize=[13,8])

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x7f4acc25f2b0>

# Acousticness attribute

In [5]:
acousticness = data[['acousticness']]
acousticness.plot(kind='bar',figsize=[13,8])

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x7f4a9fbb5cc0>

In [6]:
acousticness.describe()

Unnamed: 0,acousticness
count,200.0
mean,0.26437
std,0.327115
min,7e-06
25%,0.01085
50%,0.0998
75%,0.4765
max,0.996


# Danceability attribute

In [7]:
danceability = data[['danceability']]
danceability.plot(kind='bar',figsize=[13,8])

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x7f4a9f68feb8>

In [8]:
danceability.describe()

Unnamed: 0,danceability
count,200.0
mean,0.54135
std,0.174454
min,0.092
25%,0.43675
50%,0.5465
75%,0.6635
max,0.93


# Duration attribute

In [9]:
duration_ms = data[['duration_ms']]
duration_ms.plot(kind='bar',figsize=[13,8])

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x7f4a9f293320>

In [10]:
duration_ms.describe()

Unnamed: 0,duration_ms
count,200.0
mean,242882.105
std,72957.991941
min,107893.0
25%,192847.0
50%,231033.0
75%,276560.0
max,519360.0


# Energy attribute

In [11]:
energy = data[['energy']]
energy.plot(kind='bar',figsize=[13,8])

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x7f4a9eea3e48>

In [12]:
energy.describe()

Unnamed: 0,energy
count,200.0
mean,0.643409
std,0.251341
min,0.0223
25%,0.49275
50%,0.6845
75%,0.842
max,0.995


# Instrumentalness attribute

In [13]:
instrumentalness = data[['instrumentalness']]
instrumentalness.plot(kind='bar',figsize=[13,8])

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x7f4a9ea00e10>

In [14]:
instrumentalness.describe()

Unnamed: 0,instrumentalness
count,200.0
mean,0.172061
std,0.318767
min,0.0
25%,1e-06
50%,0.000615
75%,0.11175
max,0.969


# Key attribute

In [15]:
key = data[['key']]
key.plot(kind='bar',figsize=[13,8])

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x7f4a9e68e630>

In [16]:
key.describe()

Unnamed: 0,key
count,200.0
mean,5.165
std,3.521331
min,0.0
25%,2.0
50%,5.0
75%,8.0
max,11.0


# Liveness attribute

In [17]:
liveness = data[['liveness']]
liveness.plot(kind='bar',figsize=[13,8])

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x7f4a9e207f28>

In [18]:
liveness.describe()

Unnamed: 0,liveness
count,200.0
mean,0.195631
std,0.170654
min,0.0288
25%,0.09395
50%,0.1255
75%,0.234
max,0.975


# Loudness attribute

In [19]:
loudness = data[['loudness']]
loudness.plot(kind='bar',figsize=[13,8])

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x7f4a9de8deb8>

In [20]:
loudness.describe()

Unnamed: 0,loudness
count,200.0
mean,-8.32282
std,5.043612
min,-30.937
25%,-9.84875
50%,-6.918
75%,-4.96775
max,-1.145


# Speechiness attribute

In [21]:
speechiness = data[['speechiness']]
speechiness.plot(kind='bar',figsize=[13,8])

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x7f4a9da12f28>

In [22]:
speechiness.describe()

Unnamed: 0,speechiness
count,200.0
mean,0.070822
std,0.062198
min,0.0245
25%,0.036725
50%,0.04775
75%,0.0766
max,0.451


# Tempo attribute

In [23]:
tempo = data[['tempo']]
tempo.plot(kind='bar',figsize=[13,8])

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x7f4a9d68af98>

In [24]:
tempo.describe()

Unnamed: 0,tempo
count,200.0
mean,122.23844
std,29.130569
min,63.152
25%,100.2965
50%,121.9485
75%,138.018
max,207.986


# Time Signature attribute

In [25]:
time_signature = data[['time_signature']]
time_signature.plot(kind='bar',figsize=[13,8])

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x7f4a9d209588>

In [26]:
time_signature.describe()



Unnamed: 0,time_signature
count,199.0
mean,3.929648
std,0.275379
min,3.0
25%,
50%,
75%,
max,5.0


# Valence attribute

In [27]:
valence = data[['valence']]
valence.plot(kind='bar',figsize=[13,8])

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x7f4a9ce8a2e8>

In [28]:
valence.describe()

Unnamed: 0,valence
count,200.0
mean,0.490206
std,0.251072
min,0.0392
25%,0.2765
50%,0.4955
75%,0.676
max,0.967
