Merging with economic data


In [None]:
import pandas as pd
import numpy as np


music_df = pd.read_csv(r'C:\Users\joshu\OneDrive\Desktop\CS74\Final_Project\weekly_aggregated.csv')

# Convert week_date to datetime
music_df['week_date'] = pd.to_datetime(music_df['week_date'])

#want t in month form for compatibility with economic data
music_monthly = music_df.resample('ME', on='week_date').mean(numeric_only=True).reset_index()

# Load economic datasets

usrec = pd.read_csv(r'C:\Users\joshu\OneDrive\Desktop\CS74\Final_Project\USREC.csv')
unrate = pd.read_csv(r'C:\Users\joshu\OneDrive\Desktop\CS74\Final_Project\UNRATE.csv')
gdp = pd.read_csv(r'C:\Users\joshu\OneDrive\Desktop\CS74\Final_Project\GDP.csv')
cpi = pd.read_csv(r'C:\Users\joshu\OneDrive\Desktop\CS74\Final_Project\CPI.csv')

# Standardize column names/data formats
usrec['observation_date'] = pd.to_datetime(usrec['observation_date'])
usrec.rename(columns={'observation_date': 'DATE'}, inplace=True)
unrate['DATE'] = pd.to_datetime(unrate['DATE'])
gdp['DATE'] = pd.to_datetime(gdp['DATE'])
cpi['DATE'] = pd.to_datetime(cpi['DATE'])

# 4. Merge economic datasets
econ_df = (
    usrec
    .merge(unrate[['DATE','UNRATE']], on='DATE', how='left')
    .merge(gdp[['DATE','GDP']], on='DATE', how='left')
    .merge(cpi[['DATE','CPIAUCSL']], on='DATE', how='left')
)

# -----------------------------
# 5. Filter music to overlapping date range
# -----------------------------
start_date = max(music_monthly['week_date'].min(), gdp['DATE'].min(), cpi['DATE'].min())
end_date   = min(music_monthly['week_date'].max(), gdp['DATE'].max(), cpi['DATE'].max())

music_monthly_filtered = music_monthly[
    (music_monthly['week_date'] >= start_date) &
    (music_monthly['week_date'] <= end_date)
].copy()

# -----------------------------
# 6. Align on year-month
# -----------------------------
music_monthly_filtered['year_month'] = music_monthly_filtered['week_date'].dt.to_period('M')
econ_df['year_month'] = econ_df['DATE'].dt.to_period('M')

merged_df = pd.merge(music_monthly_filtered, econ_df, on='year_month', how='left')

# Drop temporary key
merged_df.drop(columns=['year_month'], inplace=True)

# -----------------------------
# 7. Forward-fill GDP for monthly alignment
# -----------------------------
merged_df['GDP'] = merged_df['GDP'].ffill()  # or interpolate if preferred

# -----------------------------
# 8. Save merged data to CSV
# -----------------------------
output_path = r'C:\Users\joshu\OneDrive\Desktop\CS74\Final_Project\music_econ_merged.csv'
merged_df.to_csv(output_path, index=False)

print(f"✅ Merged dataset saved successfully to: {output_path}")
print(f"Shape: {merged_df.shape}")
print(merged_df[['week_date','GDP','CPIAUCSL','UNRATE','USREC']].head(10))


✅ Merged dataset saved successfully to: C:\Users\joshu\OneDrive\Desktop\CS74\Final_Project\music_econ_merged.csv
Shape: (569, 31)
   week_date       GDP  CPIAUCSL  UNRATE  USREC
0 1974-01-31  1491.209      46.8     5.1      1
1 1974-02-28  1491.209      47.3     5.2      1
2 1974-03-31  1491.209      47.8     5.1      1
3 1974-04-30  1530.056      48.1     5.1      1
4 1974-05-31  1530.056      48.6     5.1      1
5 1974-06-30  1530.056      49.0     5.4      1
6 1974-07-31  1560.026      49.3     5.5      1
7 1974-08-31  1560.026      49.9     5.5      1
8 1974-09-30  1560.026      50.6     5.9      1
9 1974-10-31  1599.679      51.0     6.0      1
