In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import spotipy
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline

In [None]:
data = pd.read_csv("data.csv")

In [None]:
df_data = pd.DataFrame(data)

In [None]:
df_data.head()

In [None]:
print(df_data.columns)

In [None]:
print("df_data:",df_data.shape)

In [None]:
print(df_data.info())

In [None]:
print(df_data.describe())

In [None]:
df = df_data.drop(columns=['id','name','artists','release_date'])
df.fillna(0)
df.corr()

In [None]:
print(df.shape)
df.isnull().sum()

In [None]:
plt.figure(figsize=(5,3))
sns.set(font_scale=1)

sns.heatmap(df.isnull(), cbar=False, cmap='viridis')

plt.show()

In [None]:
df_data.isnull().sum()

In [None]:
plt.figure(figsize=(5,3))
sns.set(font_scale=1)

sns.heatmap(df_data.isnull(), cbar=False, cmap='magma')

plt.show()

In [None]:
#Scale down the dataset
from sklearn.preprocessing import MinMaxScaler

datatypes = ['int16','int32','int64','float16','float32','float64']
normalization = data.select_dtypes(include=datatypes)
for col in normalization.columns:
    MinMaxScaler(col)

In [None]:
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=10)
features = kmeans.fit_predict(normalization)
data['features'] = features
MinMaxScaler(data['features'])

In [None]:
from tqdm import tqdm

class music_recommendation():
    def __init__(self, dataset):
        self.dataset = dataset
    
    def recommend(self, song_name, amount=1):
        distance = []
        song = self.dataset[self.dataset.name.str.lower() == song_name.lower()].head(1).values[0]
        rec = self.dataset[self.dataset.name.str.lower() != song_name.lower()]
        for songs in tqdm(rec.values):
            d = 0
            for col in np.arange(len(rec.columns)):
                if not col in [1,6,12,14,18]:
                    try:
                        d = d + np.absolute(float(song[col]) - float(songs[col]))
                    except ValueError:
                        # Handle non-numeric values here
                        d = d + 0  # set distance to zero for non-numeric values
            distance.append(d)
        rec['distance'] = distance
        rec = rec.sort_values('distance')
        columns = ['artists', 'name']
        return rec[columns][:amount]


In [None]:
recommendation = music_recommendation(data)
recommendation.recommend("Lovers Rock", 5)

In [None]:
recommendation.recommend("Danny Boy", 10)

In [None]:
recommendation.recommend("When Irish Eyes Are Smiling", 10)

In [None]:
plt.figure(figsize=(2,2))
sns.set(font_scale=1)

h = ['acousticness', 'danceability', 'energy', 'instrumentalness', 'liveness', 'valence']
 
fig = px.line(df_data, x='year', y=h , color_discrete_sequence=px.colors.qualitative.Dark2)
fig.update_layout(template='plotly_white')
fig.show()

In [None]:
plt.figure(figsize=(5,5))
sns.set(font_scale=1)

ax = sns.histplot(df_data.nlargest(10,['popularity']), x='year', y=['valence','energy','danceability','acousticness','instrumentalness',
       'liveness', 'loudness', 'mode','speechiness', 'tempo'], )
ax.set_facecolor('none')
plt.show()