Our project is to uncover patterns in music on Spotify between 2010 and 2019. We'll examine musical trends and genres through the years; which artists appeared most frequently, and how beats per minute correlate to dancibility.

What is the most popular genre?

Who is the most popular artist?

Did we see artists make the top ten for various years or just one?

Do beats per minute correlate to ‘dancibility’?

What is the statistical analysis for beats per minute?

Are any of the songs outliers?

Tasks Collect the Data Clean and merge data Explore the data Create Analysis Create the Presentation



In [None]:
# Import Dependencies
import pandas as pd
import matplotlib.pyplot as plt
import os
import csv
import numpy as np
import scipy.stats as st
from scipy.stats import iqr
from scipy.stats import linregress


In [None]:
#Import data
music_data_to_load = "top10s.csv"

In [None]:
# Data File and store into Pandas DataFrames
music_data_pd = pd.read_csv(music_data_to_load, sep=',', encoding='latin-1')
music_data_pd.head()

In [None]:
music_data_pd["top genre"].value_counts()

In [None]:
top_genre_df = pd.DataFrame({"total":  music_data_pd["top genre"].value_counts()})
top_genre_df.head()

In [None]:
# Determine most popular genre overall 
# top_genre = music_data_pd.groupby(["top genre"])
top_genre_df = pd.DataFrame({"total":  music_data_pd["top genre"].value_counts()})
top_genre_df = top_genre_df.sort_values("total", ascending=False)
top_genre_df
top_genre_df.reset_index(inplace=True)
top_genre_df
top_genre_df.columns = ["genre", "total"]
top_genre_df

In [None]:
# Graph popular genre overall
plt.figure(figsize=[15,10])
x_values=top_genre_df['genre']
y_values=top_genre_df['total']

plt.bar(x_values, y_values, color = 'g')
plt.xlabel('genre', fontsize = 12)
plt.xticks(rotation='vertical')

plt.ylabel('total', fontsize = 12)
plt.title('Most Popular Genre Overall')
plt.show()

# save plot as .pngs
plt.savefig("images/Most Popular Genre Overall.png")


In [None]:
# Determine most popular genre by year
top_genre_year_df = music_data_pd.groupby(["year", "top genre"]).count()
top_genre_year_df = top_genre_year_df.sort_values(["year", "Unnamed: 0"], ascending=False)
#top_genre_year_df.head()
clean_by_year_df = top_genre_year_df.drop(['title', 'artist', 'bpm', 'nrgy', 'dnce', 'dB', 'live', 'val', 'dur',
                                           'acous', 'spch', 'pop' ], axis=1)
#clean_by_year_df

cleaner_by_year_df = clean_by_year_df[(clean_by_year_df['Unnamed: 0']>11)]
cleaner_by_year_df.reset_index(inplace=True)
cleaner_by_year_df.columns = ["Year", "Top Genre", "Total"]
cleaner_by_year_df

In [None]:
plt.figure(figsize=[15,10])
x_axis=cleaner_by_year_df['Year']
y_axis=cleaner_by_year_df['Total']

plt.bar(x_axis, y_axis, color = 'g')
plt.xlabel('Year', fontsize = 12)
plt.xticks(rotation='vertical')

plt.ylabel('Total', fontsize = 12)
plt.title('Most Popular Genre: Dance Pop by Year')
plt.show()

# save plot as .pngs
plt.savefig("images/Most Popular Genre Overall.png")


In [None]:
# Most popular artist overall
top_artist_df = pd.DataFrame({"total":  music_data_pd["artist"].value_counts()})
top_artist_df = top_artist_df[(top_artist_df['total']>9)]
top_artist_df = top_artist_df.sort_values("total", ascending=False)
top_artist_df.reset_index(inplace=True)
top_artist_df.columns = ["artist", "total"]
top_artist_df

In [None]:
# Graph most popular artist overall
plt.figure(figsize=[15,10])
x_values=top_artist_df['artist']
y_values=top_artist_df['total']

plt.bar(x_values, y_values, color = 'b')
plt.xlabel('Artist', fontsize = 16)
plt.xticks(rotation='45', fontsize = 14)

plt.ylabel('Total', fontsize = 16)
plt.title('Most Popular Artist Overall')
plt.show()

# save plot as .pngs
plt.savefig("images/Most Popular Artist Overall.png")