In [70]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Load the dataset from the uploaded file
file_path = 'spotify.csv'
spotify_df = pd.read_csv(file_path)

# Convert the 'track_album_release_date' column to datetime format, handling mixed formats
spotify_df['track_album_release_date'] = pd.to_datetime(spotify_df['track_album_release_date'], errors='coerce') 

# Filter the dataset for tracks released in 2017
spotify_2017 = spotify_df[spotify_df['track_album_release_date'].dt.year == 2017]

# Group the data by genre and calculate the average values for the specified columns
genre_averages = spotify_2017.groupby('playlist_genre').agg({
    'loudness': 'mean',
    'mode': 'mean',
    'speechiness': 'mean',
    'acousticness': 'mean',
    'instrumentalness': 'mean',
    'liveness': 'mean',
    'valence': 'mean',
    'tempo': 'mean',
    'duration_ms': lambda x: x.mean() / 60000  # convert milliseconds to minutes
}).reset_index()

# Save the findings into a .csv file
output_path = 'Output.csv'
genre_averages.to_csv(output_path, index=False)

# Display the calculated averages
print(genre_averages)

# Investigating the correlation of danceability with other factors
correlation_matrix = spotify_2017[['danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']].corr()

correlation_matrix

  playlist_genre  loudness      mode  speechiness  acousticness  \
0            edm -5.488184  0.546392     0.080881      0.093323   
1          latin -6.054218  0.589873     0.096266      0.179929   
2            pop -6.097230  0.535714     0.076634      0.189251   
3            r&b -7.674685  0.521552     0.144754      0.305785   
4            rap -6.700654  0.523810     0.218910      0.204430   
5           rock -7.042250  0.577586     0.064947      0.174181   

   instrumentalness  liveness   valence       tempo  duration_ms  
0          0.171589  0.188008  0.408136  123.911472     3.762918  
1          0.051904  0.178064  0.567343  117.825025     3.528350  
2          0.049959  0.170986  0.492012  118.333970     3.587487  
3          0.036085  0.168233  0.441277  115.150198     3.723384  
4          0.052506  0.184626  0.424720  123.851310     3.442068  
5          0.117893  0.201316  0.425194  129.680991     4.213990  


Unnamed: 0,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo
danceability,1.0,-0.010399,0.116584,0.110413,-0.037089,-0.007956,-0.1247,0.326548,-0.117292
energy,-0.010399,1.0,0.659693,-0.028423,-0.522086,0.002908,0.147612,0.259415,0.134594
loudness,0.116584,0.659693,1.0,-0.070235,-0.360073,-0.254785,0.042839,0.241392,0.066852
speechiness,0.110413,-0.028423,-0.070235,1.0,0.060963,-0.103047,0.08179,0.026041,0.046718
acousticness,-0.037089,-0.522086,-0.360073,0.060963,1.0,-0.034078,-0.070064,0.001391,-0.116627
instrumentalness,-0.007956,0.002908,-0.254785,-0.103047,-0.034078,1.0,-0.018791,-0.152963,0.008136
liveness,-0.1247,0.147612,0.042839,0.08179,-0.070064,-0.018791,1.0,-0.016513,0.011765
valence,0.326548,0.259415,0.241392,0.026041,0.001391,-0.152963,-0.016513,1.0,-0.054993
tempo,-0.117292,0.134594,0.066852,0.046718,-0.116627,0.008136,0.011765,-0.054993,1.0
