In [None]:
import os
import numpy as np
import pandas as pd

import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt

from prophet import Prophet

import warnings
warnings.filterwarnings("ignore")

In [None]:
year_data = pd.read_csv('/content/data.csv')

In [None]:
# Select specific columns
selected_columns = ['year', 'acousticness', 'danceability', 'energy', 'instrumentalness', 'liveness', 'valence']

# Print the selected columns
print("Selected columns of the data:")
print(year_data[selected_columns].head())

Selected columns of the data:
   year  acousticness  danceability  energy  instrumentalness  liveness  \
0  1921         0.982         0.279   0.211          0.878000     0.665   
1  1921         0.732         0.819   0.341          0.000000     0.160   
2  1921         0.961         0.328   0.166          0.913000     0.101   
3  1921         0.967         0.275   0.309          0.000028     0.381   
4  1921         0.957         0.418   0.193          0.000002     0.229   

   valence  
0   0.0594  
1   0.9630  
2   0.0394  
3   0.1650  
4   0.2530  


In [None]:
print(year_data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 170653 entries, 0 to 170652
Data columns (total 19 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   valence           170653 non-null  float64
 1   year              170653 non-null  int64  
 2   acousticness      170653 non-null  float64
 3   artists           170653 non-null  object 
 4   danceability      170653 non-null  float64
 5   duration_ms       170653 non-null  int64  
 6   energy            170653 non-null  float64
 7   explicit          170653 non-null  int64  
 8   id                170653 non-null  object 
 9   instrumentalness  170653 non-null  float64
 10  key               170653 non-null  int64  
 11  liveness          170653 non-null  float64
 12  loudness          170653 non-null  float64
 13  mode              170653 non-null  int64  
 14  name              170653 non-null  object 
 15  popularity        170653 non-null  int64  
 16  release_date      17

In [None]:
# Drop rows with NaN values in the selected columns
year_data_cleaned = year_data[selected_columns].dropna()

In [None]:
# Calculate the mean of each feature for every year
sound_features = ['acousticness', 'danceability', 'energy', 'instrumentalness', 'liveness', 'valence']
mean_data = year_data_cleaned.groupby('year')[sound_features].mean().reset_index()

In [None]:
print(mean_data)

    year  acousticness  danceability    energy  instrumentalness  liveness  \
0   1921      0.886896      0.418597  0.231815          0.344878  0.205710   
1   1922      0.938592      0.482042  0.237815          0.434195  0.240720   
2   1923      0.957247      0.577341  0.262406          0.371733  0.227462   
3   1924      0.940200      0.549894  0.344347          0.581701  0.235219   
4   1925      0.962607      0.573863  0.278594          0.418297  0.237668   
..   ...           ...           ...       ...               ...       ...   
95  2016      0.284171      0.600202  0.592855          0.093984  0.181170   
96  2017      0.286099      0.612217  0.590421          0.097091  0.191713   
97  2018      0.267633      0.663500  0.602435          0.054217  0.176326   
98  2019      0.278299      0.644814  0.593224          0.077640  0.172616   
99  2020      0.219931      0.692904  0.631232          0.016376  0.178535   

     valence  
0   0.379327  
1   0.535549  
2   0.625492  
3  

In [None]:
# Plot the means
fig = px.line(mean_data, x='year', y=sound_features)
fig.show()

In [None]:
# Function to prepare the data for Prophet
def prepare_prophet_data(df, feature):
    df_prophet = df[['year', feature]].rename(columns={'year': 'ds', feature: 'y'})
    return df_prophet

# Forecasting each sound feature using Prophet
future_years = pd.DataFrame({'ds': pd.date_range(start='2021', end='2075', freq='Y')})

forecast_dfs = []
for feature in sound_features:
    df_prophet = prepare_prophet_data(mean_data, feature)  # Use mean_data here
    model = Prophet()
    model.fit(df_prophet)
    forecast = model.predict(future_years)
    forecast['feature'] = feature
    forecast_dfs.append(forecast[['ds', 'yhat', 'feature']])

# Combine all forecasts
all_forecasts = pd.concat(forecast_dfs)

# Plot the forecasts
fig = px.line(all_forecasts, x='ds', y='yhat', color='feature',
              labels={'ds': 'Year', 'yhat': 'Value', 'feature': 'Feature'},
              title='Sound Feature Predictions for 2075')
fig.show()

INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpv9y3f2js/a49t1jsb.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpv9y3f2js/8hr3z3wl.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=31633', 'data', 'file=/tmp/tmpv9y3f2js/a49t1jsb.json', 'init=/tmp/tmpv9y3f2js/8hr3z3wl.json', 'output', 'file=/tmp/tmpv9y3f2js/prophet_modelaqnjp4_p/prophet_model-20240713060735.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
06:07:35 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
06:07:35 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Disabling weekly seasonalit