In [None]:
import polars as pl
from utils import db_config
import seaborn as sns
import matplotlib.pyplot as plt
from typing import Iterable

In [None]:
conn_config = db_config.get_postgres_connection_string('postgres_fb_05')
activity_col = 'activity'

In [None]:
def aggregate_data(
    df: pl.DataFrame,
    groupby_cols: Iterable[str],
    agg: str,
    alias: str = activity_col
) -> pl.DataFrame:
    df = df.group_by(*groupby_cols).agg(pl.count(agg).alias(alias))
    return df.drop_nulls()

In [None]:
data = pl.read_database_uri(
    query='SELECT * FROM activity_datapoints',
    uri=conn_config,
    engine='connectorx'
)

In [None]:
split_timestamp = data.select(
    pl.col('timestamp').dt.year().alias('years'),
    pl.col('timestamp').dt.weekday().cast(pl.Int16).alias('days'),
    pl.col('timestamp').dt.hour().cast(pl.Int16).alias('hours'),
    pl.col('src')
)
years = split_timestamp.select(
    'years',
    'src'
)

weekly = split_timestamp.select(
    'days',
    'src'
)

hourly = split_timestamp.select(
    'hours',
    'src'
)

In [None]:
years_agg = aggregate_data(
    df=years,
    groupby_cols=['years', 'src'],
    agg='years'
).sort('years')

weekly_agg = aggregate_data(
    df=weekly,
    groupby_cols=['days', 'src'],
    agg='days'
).sort('days')

hourly_agg = aggregate_data(
    df=hourly,
    groupby_cols=['hours', 'src'],
    agg='hours'
).sort('hours')

hours_per_day = split_timestamp.select(
    'days',
    'hours',
    'src'
).group_by(
    'days',
    'hours'
).agg(
    pl.count('hours').alias('hourly_activity')
).sort('hours').drop_nulls()

In [None]:
sns.lineplot(
    data=years_agg,
    x='years',
    y=activity_col,
    errorbar=None
)

In [None]:
years_agg_less_300 = years_agg.filter(
    ~pl.col('src').is_in(['reactions', 'ext_activity', 'account_activity'])
)

sns.lineplot(
    data=years_agg_less_300,
    x='years',
    y=activity_col,
    hue='src',
    legend='brief'
)
plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0)

In [None]:
years_agg_grtr_300 = years_agg.filter(
    pl.col('src').is_in(['reactions', 'ext_activity', 'account_activity'])
)

sns.lineplot(
    data=years_agg_grtr_300,
    x='years',
    y='activity',
    hue='src',
    legend='brief'
)
plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0)

In [None]:
g = sns.FacetGrid(data=hours_per_day, col='days', col_wrap=7, height=4)
g.map(sns.barplot, 'hours', 'hourly_activity', order=[i for i in range(0, 24)])