# Sample Notebook

## Setup

### Imports
Put the import statements at the top so it is easy for your readers to know what they must install to use your notebook.

In [12]:
import pandas as pd
import plotnine as gg

In [13]:
from tqdm.notebook import tqdm

### Configurations

For instance, how to show cell output. 

In [14]:
# Example with default 'last'
sum(range(10)) #no

print(sum(range(10))) #yes

sum(range(100)) #yes

45

45


4950

In [15]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"  

#Options: 'all', 'last', 'last_expr', 'none', 'last_expr_or_assign'

In [16]:
# Example v2 with 'all'
sum(range(10))        #yes
print(sum(range(10))) #yes
sum(range(100))       #yes

45

45


4950

## Working with data

### Loading Data

In [17]:
df = pd.read_csv("../data/registration_times.csv")

In [18]:
df.head()
df.dtypes  #not the types we want!

Unnamed: 0,Registration Time,org
0,2022-10-19 13:43:15,cu
1,2022-10-19 18:53:05,cu
2,2022-10-27 09:56:39,cu
3,2022-10-25 13:45:37,cu
4,2022-10-20 09:37:27,cu


Registration Time    object
org                  object
dtype: object

### Aside: Editor Features

In [19]:
#demonstrate completion
pd.to_   #(tab)

AttributeError: module 'pandas' has no attribute 'to_'

In [None]:
#demonstate help
pd.to_datetime #(shift-tab)

In [None]:
#contextual help in Help > Show Contextual Help

### Prep Data

In [None]:

df['Registration_Time'] = pd.to_datetime(df['Registration Time'])
df['org'] = pd.Categorical(df.org, categories=['wcm', 'cu', 'other'])
df.dtypes

In [None]:
df.describe(include='all', datetime_is_numeric=True)

### Plot data

In [None]:
start_t =  pd.to_datetime('2022-10-18 13:00')
stop_t = pd.to_datetime('2022-10-20 13:00')

#pandas to manipulate data
plot_df = (
    df
    .sort_values("Registration_Time")
    .query("(Registration_Time >= @start_t) & (Registration_Time <= @stop_t)")
)

# plotnine to plot (using grammar of graphics like ggplot2)
(
    gg.ggplot(plot_df, gg.aes(x='Registration_Time', fill='org'))
    + gg.geom_histogram(binwidth=4/24, position='stack', color='black', show_legend=False)
    + gg.facet_wrap('org')
    + gg.theme_bw()
    + gg.theme(figure_size=(8,4))
    + gg.theme(axis_text_x=gg.element_text(angle=30, hjust=1))
    + gg.xlab("Time")
    + gg.ylab("Registrations") 
    + gg.ggtitle("Registrations for R workshop in first 48 hours")
)

In [None]:
## Show save, download, export