**Getting the data**

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
pd.set_option('display.max_columns', None)
%matplotlib inline

In [None]:
spotify_songs_df=pd.read_csv('../input/spotify-songs/spotify_songs.csv')

In [None]:
# spotify_songs_df=pd.read_csv('/content/spotify_songs.csv')
spotify_songs_df.head(20)

In [None]:
print('Dataset: ', spotify_songs_df.shape[0], 'Rows with', spotify_songs_df.shape[1], 'Features')

**Exploratory data analysis**

In [None]:
spotify_songs_df.info()

In [None]:
spotify_songs_df.describe()

In [None]:
#Finding missing values on the dataset
spotify_songs_df.isnull().sum()

In [None]:
#Listing the entries with missing values
spotify_songs_df[pd.isnull(spotify_songs_df["track_artist"])].transpose()

In [None]:
#Finding number of duplicated values on the dataset
spotify_songs_df.duplicated().sum()

In [None]:
#Viewing the most popular songs revealed that there were actually duplicates
spotify_songs_df.sort_values('track_popularity',ascending=False).head()

In [None]:
#Those duplicates don't necessarily have the same numeric values and may be from differen't albums
spotify_songs_df[spotify_songs_df['track_name']=='Dance Monkey']

**Adding Columns**

In [None]:
spotify_songs_df['duration_s'] = spotify_songs_df['duration_ms']/1000
spotify_songs_df['duration_min'] = spotify_songs_df['duration_s']/60

**Visualizing of numerical columns**

In [None]:
palette = sns.color_palette("Set2")
sns.set_palette(palette)
sns.set_style('white')

In [None]:
spotify_songs_df.hist(figsize = (15,15))
plt.show()

**Visualizing of categorical features**

In [None]:
fig2 = plt.figure(figsize=(20,15))
ax1 = fig2.add_subplot(331)
ax1.set_title("track_album_name")
spotify_songs_df['track_album_name'].value_counts().head(10).plot(kind='barh')


ax1 = fig2.add_subplot(332)
ax1.set_title("track_name")
spotify_songs_df['track_name'].value_counts().head(10).plot(kind='barh')


ax1 = fig2.add_subplot(333)
ax1.set_title("track_artist")
spotify_songs_df['track_artist'].value_counts().head(10).plot(kind='barh')


ax1 = fig2.add_subplot(334)
ax1.set_title("track_album_release_date")
spotify_songs_df['track_album_release_date'].value_counts().head(10).plot(kind='barh')


ax1 = fig2.add_subplot(335)
ax1.set_title("playlist_subgenre")
spotify_songs_df['playlist_subgenre'].value_counts().head(10).plot(kind='barh')


ax1 = fig2.add_subplot(336)
ax1.set_title("playlist_genre")
spotify_songs_df['playlist_genre'].value_counts().head(10).plot(kind='barh')

ax1 = fig2.add_subplot(337)
ax1.set_title("playlist_name")
spotify_songs_df['playlist_name'].value_counts().head(10).plot(kind='barh')

plt.tight_layout()

**Heatmap**

In [None]:
df_corr = spotify_songs_df.drop(columns=['duration_ms', 'duration_s'])

In [None]:
plt.figure(figsize=(16, 8))
sns.heatmap(df_corr.corr(), annot_kws={'weight':'bold'},linewidths=.5, cmap="Greens", annot=True)
plt.savefig("heatmap.png")

**Just histplots again with other visuals**

In [None]:
#Idk why they are so blurry, I am sorry
sns.set_theme()
fig, axes = plt.subplots(4, 3, figsize=(18, 10),dpi=200)
plt.subplots_adjust(hspace = 0.5)
plt.subplots_adjust(wspace = 0.25)

sns.histplot(ax=axes[0,0],data=spotify_songs_df,x='track_popularity',kde=False,bins=25,color='g',edgecolor='g',stat='density')
sns.kdeplot(data=spotify_songs_df, x="track_popularity", color='black', ax=axes[0][0], clip=(0,100))

sns.histplot(ax=axes[0,1],data=spotify_songs_df,x='danceability',kde=False,bins=25,color='g',edgecolor='g',stat='density')
sns.kdeplot(data=spotify_songs_df, x="danceability", color='black', ax=axes[0][1], clip=(0,1))

sns.histplot(ax=axes[0,2],data=spotify_songs_df,x='energy',kde=False,bins=25,color='g',edgecolor='g',stat='density')
sns.kdeplot(data=spotify_songs_df, x="energy", color='black', ax=axes[0][2], clip=(0,1))

sns.histplot(ax=axes[1,0],data=spotify_songs_df,x='key',kde=False,bins=11,color='g',edgecolor='g',stat='density')
sns.kdeplot(data=spotify_songs_df, x="key", color='black', ax=axes[1][0], clip=(0,11),bw_method=0.2)

sns.histplot(ax=axes[1,1],data=spotify_songs_df,x='loudness',kde=False,bins=50,color='g',edgecolor='g',stat='density')
sns.kdeplot(data=spotify_songs_df, x="loudness", color='black', ax=axes[1][1], clip=(-46.5,1.3))

sns.histplot(ax=axes[1,2],data=spotify_songs_df,x='speechiness',kde=False,bins=25,color='g',edgecolor='g',stat='density')
sns.kdeplot(data=spotify_songs_df, x="speechiness", color='black', ax=axes[1][2], clip=(0,1))

sns.histplot(ax=axes[2,0],data=spotify_songs_df,x='acousticness',kde=False,bins=25,color='g',edgecolor='g',stat='density')
sns.kdeplot(data=spotify_songs_df, x="acousticness", color='black', ax=axes[2][0], clip=(0,1))

sns.histplot(ax=axes[2,1],data=spotify_songs_df,x='instrumentalness',kde=False,bins=10,color='g',edgecolor='g',stat='density')
sns.kdeplot(data=spotify_songs_df, x="instrumentalness", color='black', ax=axes[2][1], clip=(0,1))

sns.histplot(ax=axes[2,2],data=spotify_songs_df,x='liveness',kde=False,bins=25,color='g',edgecolor='g',stat='density')
sns.kdeplot(data=spotify_songs_df, x="liveness", color='black', ax=axes[2][2], clip=(0,1))

sns.histplot(ax=axes[3,0],data=spotify_songs_df,x='valence',kde=False,bins=25,color='g',edgecolor='g',stat='density')
sns.kdeplot(data=spotify_songs_df, x="valence", color='black', ax=axes[3][0], clip=(0,1),bw_method=0.2)

sns.histplot(ax=axes[3,1],data=spotify_songs_df,x='tempo',kde=False,bins=25,color='g',edgecolor='g',stat='density')
sns.kdeplot(data=spotify_songs_df, x="tempo", color='black', ax=axes[3][1], bw_method=0.2)

sns.histplot(ax=axes[3,2],data=spotify_songs_df,x='duration_s',kde=False,bins=25,color='g',edgecolor='g',stat='density')
sns.kdeplot(data=spotify_songs_df, x="duration_s", color='black', ax=axes[3][2])

In [None]:
bp = sns.barplot(data=spotify_songs_df, x="track_popularity", y="playlist_genre", estimator=np.mean)
bp.set(title='Playlist Genre vs Avg Track Popularity')
bp.set_xlabel("Average Track Popularity", fontsize = 12)
bp.set_ylabel("Playlist Genre", fontsize = 12)

In [None]:
plt.figure(figsize=(20,10))
spotify_songs_df[['track_popularity','playlist_subgenre']].groupby(['playlist_subgenre']).mean().plot.barh()
plt.title("Track Popularity vs Playlist Sub Genre",fontsize=15)
plt.show()