In [1]:
import panel as pn
import pandas as pd
import plotly.express as px
pn.extension('plotly')

In [2]:
import sys
sys.path.append("../")
from src import puhti_files

In [3]:
#import importlib
#importlib.reload(puhti_files)

In [4]:
df = puhti_files.genre_data_to_pandas(data="train", add_labels=True, merge_ecco=True, better_subcat_names=True)

Read in dataset train.csv. Set param 'data' to 'dev', 'test' or 'train' if you want another dataset.


In [5]:
#TODO:
#Visualize years
#Publication place
#Authors by year
#Better sub_cat labels

In [6]:
### Setting up the data

#Publication year to numeric
df['publication_year'] = pd.to_numeric(df['publication_year'], errors='coerce')

#Calculate decades
df['decade'] = (df['publication_year'] // 10) * 10

In [7]:
def create_plot(main_category, count_mode):
    filtered_df = df[df['main_category_label'] == main_category]
    grouped_df = filtered_df.groupby(['decade', 'sub_category_label']).size().reset_index(name='count')
    
    if count_mode == 'Proportions':
        decade_totals = grouped_df.groupby('decade')['count'].sum().reset_index(name='total_count')
        merged_df = grouped_df.merge(decade_totals, on='decade')
        merged_df['value'] = (merged_df['count'] / merged_df['total_count']) #* 100
        y_label = "Proportion"
        
    else:
        merged_df = grouped_df
        merged_df['value'] = merged_df['count']
        y_label = "Count"
        
    
    pivot_df = merged_df.pivot(index='decade', columns='sub_category_label', values='value').fillna(0)
    
    
    fig = px.line(pivot_df, x=pivot_df.index, y=pivot_df.columns, markers=True)
    fig.update_layout(
        legend=dict(
            orientation='h',
            yanchor='bottom',
            y=-0.5,
            xanchor='center',
            x=0.5
        ),
        width=1000,
        height=600,
        xaxis_title="Decade",
        yaxis_title=y_label,
        legend_title="Sub Categories"
    )
    
    return fig

In [8]:
main_category_selector = pn.widgets.Select(name='Main Category', options=list(df['main_category_label'].unique()))
count_mode_selector = pn.widgets.RadioBoxGroup(name='Count Mode', options=['Proportions', 'Absolute Counts'], inline=True)
dynamic_plot = pn.bind(create_plot, main_category=main_category_selector, count_mode=count_mode_selector)
layout = pn.Column(main_category_selector, count_mode_selector, dynamic_plot)

In [9]:
# !!! NOTE: You need to run this cell twice for some reason to get the interaction working
# Proportions are always calculated within decade
layout