In [2]:
import pandas as pd
import os
import pickle

In [None]:
ref_pickle = "datasets/popularity_reference.pkl"

def process_and_store_user_data(csv_filepath, user_id):
    # Load user data
    df = pd.read_csv(csv_filepath)

    # Convert datetime and extract week
    df['datetime'] = pd.to_datetime(df['datetime'])
    df['year_week'] = df['datetime'].dt.to_period('W').apply(lambda r: r.start_time)

    # Calculate weekly popularity
    weekly_artist_pop = df.groupby('year_week')['artist_popularity'].mean().reset_index(name='artist_popularity')
    weekly_track_pop = df.groupby('year_week')['track_popularity'].mean().reset_index(name='track_popularity')

    # Merge
    weekly_df = pd.merge(weekly_artist_pop, weekly_track_pop, on='year_week')
    weekly_df['user_id'] = user_id

    # Load or initialize reference file
    if os.path.exists(ref_pickle):
        with open(ref_pickle, "rb") as f:
            reference_df = pickle.load(f)
    else:
        reference_df = pd.DataFrame()

    # Append new data and save
    reference_df = pd.concat([reference_df, weekly_df], ignore_index=True)

    with open(ref_pickle, "wb") as f:
        pickle.dump(reference_df, f)

    return weekly_df  # Return current user's processed data

In [None]:
import plotly.graph_objects as go
import streamlit as st

def display_popularity_comparison(user_id, user_df):
    # Load reference data
    with open(ref_pickle, "rb") as f:
        reference_df = pickle.load(f)

    # Filter out the current user
    other_users_df = reference_df[reference_df['user_id'] != user_id]

    # Average weekly popularity across users
    avg_ref_df = other_users_df.groupby('year_week')[['artist_popularity', 'track_popularity']].mean().reset_index()

    # Sort both
    user_df = user_df.sort_values("year_week")
    avg_ref_df = avg_ref_df.sort_values("year_week")

    # Plot
    fig = go.Figure()

    # User lines
    fig.add_trace(go.Scatter(
        x=user_df['year_week'], y=user_df['artist_popularity'],
        mode='lines+markers', name=f'{user_id} Artist Popularity',
        line=dict(color='dodgerblue'), hovertemplate='Week: %{x|%d %b %Y}<br>Artist: %{y:.1f}<extra></extra>'
    ))
    fig.add_trace(go.Scatter(
        x=user_df['year_week'], y=user_df['track_popularity'],
        mode='lines+markers', name=f'{user_id} Track Popularity',
        line=dict(color='royalblue'), hovertemplate='Week: %{x|%d %b %Y}<br>Track: %{y:.1f}<extra></extra>'
    ))

    # Reference lines
    fig.add_trace(go.Scatter(
        x=avg_ref_df['year_week'], y=avg_ref_df['artist_popularity'],
        mode='lines', name='Avg Artist Popularity',
        line=dict(dash='dash', color='darkgreen')
    ))
    fig.add_trace(go.Scatter(
        x=avg_ref_df['year_week'], y=avg_ref_df['track_popularity'],
        mode='lines', name='Avg Track Popularity',
        line=dict(dash='dash', color='green')
    ))

    fig.update_layout(
        title=f'Popularity Comparison for {user_id}',
        xaxis_title='Week',
        yaxis_title='Average Popularity',
        hovermode="x unified"
    )

    st.plotly_chart(fig, use_container_width=True)


In [None]:
uploaded_file = st.file_uploader("Upload your listening history CSV", type=["csv"])
user_id = st.text_input("Enter your user ID")

if uploaded_file and user_id:
    timestamp_suffix = pd.Timestamp.now().strftime("%Y%m%d%H%M%S")
    csv_filepath = f"datasets/{user_id}_{timestamp_suffix}.csv"

    with open(csv_filepath, "wb") as f:
        f.write(uploaded_file.read())

    user_df = process_and_store_user_data(csv_filepath, user_id)
    display_popularity_comparison(user_id, user_df)


In [1]:
popularity_ref_pickle = "datasets/chart_scores/popularity_reference.pkl"

df = pd.read_csv('datasets/user_clean/Benjie_20250603_144404.csv')
df_info = pd.read_csv('datasets/info_clean/trk_alb_art.csv')
user_id = "Benjie"

# join info to current user
df = pd.merge(df,df_info,left_on=["track_name","album_name","artist_name"],right_on=["track_name","album_name","artist_name"],how="left",suffixes=["","_remove"])


# Ensure datetime is parsed
df['datetime'] = pd.to_datetime(df['datetime'])
df['year_week'] = df['datetime'].dt.to_period('W').apply(lambda r: r.start_time)

# Weekly mean popularity
weekly_artist_pop = df.groupby('year_week')['artist_popularity'].mean().reset_index(name='artist_popularity')
weekly_track_pop = df.groupby('year_week')['track_popularity'].mean().reset_index(name='track_popularity')

weekly_df = pd.merge(weekly_artist_pop, weekly_track_pop, on='year_week')
weekly_df['user_id'] = user_id

# Append to or create reference pickle
if os.path.exists(popularity_ref_pickle):
    with open(popularity_ref_pickle, "rb") as f:
        reference_df = pickle.load(f)
else:
    reference_df = pd.DataFrame()

reference_df = pd.concat([reference_df, weekly_df], ignore_index=True)

with open(popularity_ref_pickle, "wb") as f:
    pickle.dump(reference_df, f)

NameError: name 'pd' is not defined

In [12]:
with open("datasets/chart_scores/popularity_reference.pkl", "rb") as f:
            pickle_ref = pickle.load(f)

In [7]:
df = pd.read_csv('datasets/user_clean/Benjie_20250603_144404.csv')
len(df.shape[0])

TypeError: object of type 'int' has no len()