In [7]:
# CODING
import pandas as pd
import statsmodels.api as sm

print(sm.__version__)

# 1. Load Data
df = pd.read_csv('SpotifyFeatures.csv')
print(df.head(5))

# 2. Define Variables
# IV: danceability
# DV: popularity
# Controls: energy, valence, tempo, acousticness, loudness
# (Note: 'year' was not available in this specific dataset version, so we used 
# acoustic attributes to control for song characteristics as suggested in your PDF)

X_cols = ['danceability', 'energy', 'valence', 'tempo', 'acousticness', 'loudness']
y_col = 'popularity'

# 3. Clean and Prepare
data_clean = df.dropna(subset=X_cols + [y_col])
X = data_clean[X_cols]
y = data_clean[y_col]
X = sm.add_constant(X)  # Add intercept

# 4. Fit Model
model = sm.OLS(y, X).fit()
print(model.summary())

# 5. Prediction for a single observation (Row index 12345)
obs_idx = 12345
obs_data = data_clean.loc[obs_idx]
prediction = model.get_prediction(X.loc[obs_idx])
pred_summary = prediction.summary_frame(alpha=0.05)

print(f"\nTrack: {obs_data['track_name']} by {obs_data['artist_name']}")
print(f"Actual Popularity: {obs_data['popularity']}")
print(f"Predicted Popularity: {pred_summary['mean'].values[0]:.2f}")
print(f"95% Prediction Interval: [{pred_summary['obs_ci_lower'].values[0]:.2f}, {pred_summary['obs_ci_upper'].values[0]:.2f}]")


0.14.6
   genre        artist_name                        track_name  \
0  Movie     Henri Salvador       C'est beau de faire un Show   
1  Movie  Martin & les fÃ©es  Perdu d'avance (par Gad Elmaleh)   
2  Movie    Joseph Williams    Don't Let Me Be Lonely Tonight   
3  Movie     Henri Salvador    Dis-moi Monsieur Gordon Cooper   
4  Movie       Fabien Nataf                         Ouverture   

                 track_id  popularity  acousticness  danceability  \
0  0BRjO6ga9RKCKjfDqeFgWV           0         0.611         0.389   
1  0BjC1NfoEOOusryehmNudP           1         0.246         0.590   
2  0CoSDzoNIKCRs124s9uTVy           3         0.952         0.663   
3  0Gc6TVm52BwZD07Ki6tIvf           0         0.703         0.240   
4  0IuslXpMROHdEPvSl1fTQK           4         0.950         0.331   

   duration_ms  energy  instrumentalness key  liveness  loudness   mode  \
0        99373   0.910             0.000  C#    0.3460    -1.828  Major   
1       137373   0.737             0