In [35]:
import pandas as pd
import requests
import io

In [36]:
# Downloading the csv file from our GitHub repo

url = "https://raw.githubusercontent.com/Jingxue-24/QM2-team9/main/Spotify%20API/merged.csv"
download = requests.get(url).content

# Reading the downloaded content and turning it into a pandas dataframe

df = pd.read_csv(io.StringIO(download.decode('utf-8')))

# Previewing data

print (df.head())
print(df.columns)

                                 Name  ... Time_signature
0          Bridge Over Troubled Water  ...              4
1      (They Long To Be) Close To You  ...              4
2  American Woman - 7" Single Version  ...              4
3                War - Single Version  ...              4
4       Ain't No Mountain High Enough  ...              4

[5 rows x 17 columns]
Index(['Name', 'Album', 'Artist', 'Popular_date', 'Release_date', 'Length',
       'Popularity', 'Danceability', 'Acousticness', 'Danceability.1',
       'Energy', 'Instrumentalness', 'Liveness', 'Loudness', 'Speechiness',
       'Tempo', 'Time_signature'],
      dtype='object')


In [37]:
# Making decades and adding it to dataframe
year = df['Popular_date']
decade = year - (year%10)
df['Decade'] = decade
df.loc[ : , 'Decade'] = decade

In [38]:
import statsmodels.api as sm
from statsmodels.formula.api import ols

In [39]:
# Tempo changing over decades
tempo = df['Tempo']
lm = ols('decade ~ tempo',data=df).fit()
table = sm.stats.anova_lm(lm)
print(table) #not significant 

             df         sum_sq     mean_sq         F    PR(>F)
tempo       1.0       2.852502    2.852502  0.014279  0.904909
Residual  987.0  197177.329500  199.774397       NaN       NaN


In [40]:
# Energy changing over decades
energy = df['Energy']
lm = ols('decade ~ energy',data=df).fit()
table = sm.stats.anova_lm(lm)
print(table) #is significant

             df         sum_sq       mean_sq          F        PR(>F)
energy      1.0   12736.470477  12736.470477  68.155733  4.810912e-16
Residual  987.0  184443.711525    186.873061        NaN           NaN


In [41]:
# Length of songs changing over decades
length = df['Length']
lm = ols('decade ~ length',data=df).fit()
table = sm.stats.anova_lm(lm)
print(table) #is significant

             df         sum_sq      mean_sq         F    PR(>F)
length      1.0    3380.149509  3380.149509  17.21469  0.000036
Residual  987.0  193800.032493   196.352617       NaN       NaN


In [42]:
# Danceability changing over decades
danceability = df['Danceability']
lm = ols('decade ~ danceability',data=df).fit()
table = sm.stats.anova_lm(lm)
print(table) #is significant

                 df         sum_sq      mean_sq          F        PR(>F)
danceability    1.0    6567.058882  6567.058882  34.004412  7.445221e-09
Residual      987.0  190613.123120   193.123732        NaN           NaN


In [45]:
# Instrumentalness changing over decades
instrumentalness = df['Instrumentalness']
lm = ols('decade ~ instrumentalness',data=df).fit()
table = sm.stats.anova_lm(lm)
print(table) #is significant

                     df         sum_sq      mean_sq          F    PR(>F)
instrumentalness    1.0    2477.856946  2477.856946  12.560943  0.000412
Residual          987.0  194702.325056   197.266793        NaN       NaN


In [44]:
# Loudness changing over decades
loudness = df['Loudness']
lm = ols('decade ~ loudness',data=df).fit()
table = sm.stats.anova_lm(lm)
print(table) #is significant

             df         sum_sq       mean_sq           F        PR(>F)
loudness    1.0   71613.772796  71613.772796  562.911643  7.996093e-99
Residual  987.0  125566.409206    127.220273         NaN           NaN


In [46]:
# Speechiness changing over decades
speechiness = df['Speechiness']
lm = ols('decade ~ speechiness',data=df).fit()
table = sm.stats.anova_lm(lm)
print(table) #is significant

                df         sum_sq       mean_sq          F        PR(>F)
speechiness    1.0   17445.480752  17445.480752  95.800585  1.182193e-21
Residual     987.0  179734.701250    182.102028        NaN           NaN


Tutorial(s) followed: 


*   https://towardsdatascience.com/1-way-anova-from-scratch-dissecting-the-anova-table-with-a-worked-example-170f4f2e58ad

