In [1]:
import pandas as pd
import numpy as np
import altair as alt

In [2]:
# Loading and cleaning data
cereal_ts = pd.read_csv("cereal-time-data.csv")
cereal_ts['grams'] = cereal_ts['grams'].fillna(value = 0).replace(' ', 0)
cereal_ts['date'] = pd.to_datetime(cereal_ts['date'], format = '%m/%d/%y')
cereal_ts = cereal_ts.astype({'name':'string', 'grams':'int'})
cereal_ts['percentage'] = cereal_ts['grams'] / cereal_ts.groupby('name')['grams'].transform('max')
cereal_ts = cereal_ts.sort_values(by = ['name', 'date']).reset_index(drop = True)
cereal_ts['gram_lag_1'] = cereal_ts.groupby('name')['grams'].shift(1).fillna(0)
cereal_ts['gram_change'] = cereal_ts['gram_lag_1'] - cereal_ts['grams']
cereal_ts['date_lag_1'] = pd.to_datetime(cereal_ts.groupby('name')['date'].shift(1), format = '%m/%d/%y')
cereal_ts['day_change'] = (cereal_ts['date_lag_1'] - cereal_ts['date']).dt.days
cereal_ts['change_per_day'] = cereal_ts['gram_change'] / cereal_ts['day_change']
cereal_ts.head()


Unnamed: 0,date,name,grams,percentage,gram_lag_1,gram_change,date_lag_1,day_change,change_per_day
0,2025-03-21,Cheerios,617,1.0,0.0,-617.0,NaT,,
1,2025-03-24,Cheerios,582,0.943274,617.0,35.0,2025-03-21,-3.0,-11.666667
2,2025-03-28,Cheerios,582,0.943274,582.0,0.0,2025-03-24,-4.0,-0.0
3,2025-03-31,Cheerios,582,0.943274,582.0,0.0,2025-03-28,-3.0,-0.0
4,2025-04-04,Cheerios,582,0.943274,582.0,0.0,2025-03-31,-4.0,-0.0


In [22]:
alt.Chart(cereal_ts).mark_line().encode(
    x='date:T',
    y='grams:Q',
    color='name:N'
).properties(
    width = 1000
)

In [23]:
alt.Chart(cereal_ts).mark_line().encode(
    x='date:T',
    y='gram_change:Q',
    color='name:N'
).properties(
    width = 1000
)

In [12]:
alt.Chart(cereal_ts).mark_line().encode(
    x='date:T',
    y='change_per_day:Q',
    color='name:N'
).properties(
    width = 1000
)

In [39]:
# Correlation

# Set up table 
cereal_pivot = cereal_ts.pivot(index = 'date', columns = 'name', values = 'change_per_day')
corr = cereal_pivot.corr('pearson')
corr_reset = corr.reset_index()
corr_long = pd.melt(corr_reset, id_vars='name', var_name = 'name2', value_name='corr')
corr_lower_triangle = corr_long[corr_long['name'] <= corr_long['name2']]

chart = alt.Chart(corr_lower_triangle).mark_rect().encode(
    x=alt.X('name:O', axis=alt.Axis(title='')),
    y=alt.Y('name2:O', axis=alt.Axis(title='')),
    color=alt.Color('corr:Q',
         scale=alt.Scale(domain=[-1, -0.5, 0, 0.5, 1], range=['blue', 'lightblue', 'white', 'pink', 'red']),
         title='Correlation'
     ),
    tooltip=[
        alt.Tooltip('name', title='Var 1'),
        alt.Tooltip('name2', title='Var 2'),
        alt.Tooltip('corr', title='Corr', format='.2f')
    ]
).properties(
    title='Correlation Matrix Heatmap'
)

chart

In [29]:
cereal_ts

Unnamed: 0,date,name,grams,percentage,gram_lag_1,gram_change,date_lag_1,day_change,change_per_day
0,2025-03-21,Cheerios,617,1.000000,0.0,617.0,NaT,,
1,2025-03-24,Cheerios,582,0.943274,617.0,-35.0,2025-03-21,-3.0,11.666667
2,2025-03-28,Cheerios,582,0.943274,582.0,0.0,2025-03-24,-4.0,-0.000000
3,2025-03-31,Cheerios,582,0.943274,582.0,0.0,2025-03-28,-3.0,-0.000000
4,2025-04-04,Cheerios,582,0.943274,582.0,0.0,2025-03-31,-4.0,-0.000000
...,...,...,...,...,...,...,...,...,...
184,2025-06-02,Raisin Bran,612,0.709154,67.0,545.0,2025-05-30,-3.0,-181.666667
185,2025-06-06,Raisin Bran,547,0.633835,612.0,-65.0,2025-06-02,-4.0,16.250000
186,2025-06-09,Raisin Bran,448,0.519119,547.0,-99.0,2025-06-06,-3.0,33.000000
187,2025-06-16,Raisin Bran,263,0.304751,448.0,-185.0,2025-06-09,-7.0,26.428571


In [44]:
positive_changes_df = cereal_ts[cereal_ts['gram_change'] < -10]
positive_counts = positive_changes_df[positive_changes_df['gram_change'] < 0].groupby('name').size().to_frame(name='positive_count').reset_index()

positive_counts

Unnamed: 0,name,positive_count
0,Cheerios,1
1,Chex,2
2,Corn Flakes,2
3,Frosted Mini Wheats,2
4,Honey Nut Cheerios,4
5,Honey Nut Chex,1
6,Life,2
7,Raisin Bran,4
