In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df_tracks = pd.read_csv(r'/kaggle/input/spotify-datasets/tracks.csv')

In [None]:
df_tracks.head()

In [None]:
pd.isnull(df_tracks).sum()

In [None]:
df_tracks.info()

In [None]:
sorted_df = df_tracks.sort_values('popularity', ascending = True).head(10)
sorted_df

In [None]:
df_tracks.describe().transpose()

In [None]:
most_popular = df_tracks.query('popularity>90', inplace = False).sort_values('popularity', ascending = False )
most_popular[:10]

In [None]:
df_tracks.set_index("release_date", inplace=True)
df_tracks.index=pd.to_datetime(df_tracks.index)
df_tracks.head()

In [None]:
df_tracks[["artists"]].iloc[18]

In [None]:
df_tracks["duration"]= df_tracks["duration_ms"].apply(lambda x: round(x/1000))
df_tracks.drop("duration_ms", inplace=True, axis=1)

In [None]:
df_tracks.duration.head()

In [None]:
corr_df = df_tracks.drop(["key", "mode", "explicit"], axis=1).corr(method="pearson")

plt.figure(figsize=(14, 6))
heatmap = sns.heatmap(
    corr_df, 
    annot=True, 
    fmt=".1g", 
    vmin=-1, 
    vmax=1, 
    center=0, 
    cmap="mako",  # A darker colormap
    linewidths=1, 
    linecolor="black"
)
heatmap.set_title("Correlation Heatmap Between Variables", fontsize=16)
heatmap.set_xticklabels(heatmap.get_xticklabels(), rotation=90, fontsize=10)
heatmap.set_yticklabels(heatmap.get_yticklabels(), fontsize=10)
plt.show()

In [None]:
sample_df = df_tracks.sample(int(0.004*len(df_tracks)))

In [None]:
print(len(sample_df))

In [None]:
plt.figure(figsize=(10,6))
sns.regplot(data = sample_df, y= "loudness", x = "energy", color = "#8B0000").set(title="loudness vs Energy")

In [None]:
plt.figure(figsize=(10,6))
sns.regplot(data = sample_df, y= "popularity", x = "acousticness", color = "c").set(title="popularity vs acousticness")

In [None]:
df_tracks['dates'] = df_tracks.index.get_level_values('release_date')
df_tracks['dates'] = pd.to_datetime(df_tracks['dates'])
years = df_tracks['dates'].dt.year

In [None]:
sns.displot(
    years, 
    discrete=True, 
    aspect=2, 
    height=5, 
    kind="hist", 
    color="darkgreen"
).set(title="Number of Songs per Year")
plt.show()

In [None]:
total_dr = df_tracks.duration
fig_dims = (18, 7)
fig, ax = plt.subplots(figsize=fig_dims)

sns.barplot(x=years, y=total_dr, ax=ax, color="#2E8B57", errwidth=False)
plt.title("Year vs Duration")
plt.xticks(rotation=90)

plt.show()


In [None]:
total_dr=df_tracks['duration']

sns.set_style(style="whitegrid")
fig_dims = (10, 5)
fig, ax = plt.subplots(figsize=fig_dims)
sns.lineplot(x=years, y=total_dr, ax=ax, color="#FF4500")
plt.title("Year vs Duration")
plt.xticks(rotation=60)
plt.show()

In [None]:
df_genre = pd.read_csv(r'/kaggle/input/ultimate-spotify-tracks-db/SpotifyFeatures.csv')

In [None]:
df_genre.head()

In [None]:
plt.title("Duration of the Songs in Different Genres")
sns.barplot(
    y='genre', 
    x='duration_ms', 
    data=df_genre, 
    palette="mako"  # Apply a dark colormap
)
plt.xlabel("Duration in milliseconds")
plt.ylabel("Genres")
plt.show()


In [None]:
plt.title("Duration of the Songs in Different Genres")
sns.barplot(
    y='genre', 
    x='duration_ms', 
    data=df_genre, 
    palette="viridis"  # Use a gradient-like colormap
)
plt.xlabel("Duration in milliseconds")
plt.ylabel("Genres")
plt.show()


In [None]:
sns.set_style(style="whitegrid")
plt.figure(figsize=(10, 5))
famous = df_genre.sort_values("popularity", ascending=False).head(10)
sns.barplot(y='genre', x='popularity', data=famous, palette="viridis").set(title="Top 10 Genres by Popularity")
plt.show()
