In [None]:
import logging
import math
import os
import sys
from pathlib import Path

import tomli
import numpy as np
import structlog

%load_ext autoreload
%autoreload 2

import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import seaborn as sns
sns.set_context('poster')
sns.set(rc={'figure.figsize': (16, 9.)})
sns.set_style('whitegrid')

import pandas as pd
pd.set_option('display.max_rows', 120)
pd.set_option('display.max_columns', 120)

In [None]:
# Set the logging level
logging.basicConfig(level=logging.INFO, stream=sys.stdout)

In [None]:
import pytanis
from pytanis import GSheetsClient, PretalxClient
from pytanis.pretalx import subs_as_df, reviews_as_df, speakers_as_df

In [None]:
# Be aware that this notebook might only run with the following version
pytanis.__version__

In [None]:
# Import event-specific settings to don't have them here in the notebook
with open('config.toml', 'rb') as fh:
    cfg = tomli.load(fh)

In [None]:
pretalx_client = PretalxClient(blocking=True)
subs_count, subs = pretalx_client.submissions(cfg['event_name'], params={'questions': 'all'})
spkrs_count, spkrs = pretalx_client.speakers(cfg['event_name'], params={'questions': 'all'})
revs_count, revs = pretalx_client.reviews(cfg['event_name'])
subs, revs, spkrs = list(subs), list(revs), list(spkrs)

In [None]:
subs_df = subs_as_df(subs, with_questions=True)
revs_df = reviews_as_df(revs)
spkrs_df = speakers_as_df(spkrs, with_questions=True)

In [None]:
# filter subs_df for the talks that have the submitted state
talks_df = subs_df[subs_df.State == 'submitted']
talks_df.head(2)


In [None]:
# Alle available tracks
all_tracks = ['PyCon: MLOps & DevOps', 'PyCon: Programming & Software Engineering', 'PyCon: Python Language & Ecosystem', 'PyCon: Security', 'PyCon: Testing', 'PyCon: Django & Web', 'PyData: Data Handling & Data Engineering', 'PyData: Machine Learning & Deep Learning & Statistics', 'PyData: Natural Language Processing & Audio (incl. Generative AI NLP)', 'PyData: Computer Vision (incl. Generative AI CV)', 'PyData: Generative AI', 'PyData: Embedded Systems & Robotics', 'PyData: PyData & Scientific Libraries Stack', 'PyData: Visualisation & Jupyter', 'PyData: Research Software Engineering', 'General: Community & Diversity', 'General: Education, Career & Life', 'General: Ethics & Privacy', 'General: Infrastructure - Hardware & Cloud', 'General: Others']

# all available submission types
submission_types = talks_df['Submission type'].unique()

# all available expertise levels
expertise_levels = list(talks_df['Q: Expected audience expertise: Domain'].unique()) + list(talks_df['Q: Expected audience expertise: Python'].unique())
expertise_levels = list(set(expertise_levels))

# all expertise categories
expertise_categories = ['Q: Expected audience expertise: Python', 'Q: Expected audience expertise: Domain	']

In [None]:
# create an dataframe with 'all_tracks' and all 'submission_types' as rows
tracks_df = pd.DataFrame(all_tracks, columns=['Track'])

In [None]:
submission_types

### Stats for Talks

In [None]:
# group submittaded talks by track and count the number of submissions
talks_quantification_by_domain_expertise = talks_df.loc[talks_df['Submission type'] == 'Talk'].groupby(['Track', 'Q: Expected audience expertise: Domain']).size().unstack(fill_value=0)
talks_quantification_by_domain_expertise = tracks_df.join(talks_quantification_by_domain_expertise, on='Track')
talks_quantification_by_domain_expertise = talks_quantification_by_domain_expertise[['Track', 'None', 'Novice', 'Intermediate', 'Advanced']]
# add total number of submissions
talks_quantification_by_domain_expertise['Total'] = talks_quantification_by_domain_expertise[['None', 'Novice', 'Intermediate', 'Advanced']].sum(axis=1)

talks_quantification_by_python_expertise = talks_df.loc[talks_df['Submission type'] == 'Talk'].groupby(['Track', 'Q: Expected audience expertise: Python']).size().unstack(fill_value=0)
talks_quantification_by_python_expertise = tracks_df.join(talks_quantification_by_python_expertise, on='Track')
talks_quantification_by_python_expertise = talks_quantification_by_python_expertise[['Track', 'None', 'Novice', 'Intermediate', 'Advanced']]
talks_quantification_by_python_expertise['Total'] = talks_quantification_by_python_expertise[['None', 'Novice', 'Intermediate', 'Advanced']].sum(axis=1)


# join talks_quantification_by_domain_expertise and talks_quantification_by_python_expertise and keep add a group column name fir the expertise level
talks_quantification = pd.merge(talks_quantification_by_domain_expertise, talks_quantification_by_python_expertise, on='Track', how='outer')

talks_quantification.columns = pd.MultiIndex.from_tuples([
    ('', col) if col == 'Track' else 
    ('Expected Domain Expertise by Audience', col.rstrip("_xy")) if col.endswith('_x') else 
    ('Expected Python Expertise by Audience', col.rstrip("_xy")) 
    for col in talks_quantification.columns
    
])

talks_quantification.fillna(0, inplace=True)
# convert float columns to integer
talks_quantification = talks_quantification.astype({col: int for col in talks_quantification.columns if col[1] != 'Track'})

talks_quantification


In [None]:
talks_quantification_by_domain_expertise

In [None]:
talks_quantification_by_python_expertise

### Stats for Tutorials

### Stats for Talks (long)