# 1. Sample Notebook

## 1.1 Setup

### 1.1.1. Imports
Put the import statements at the top so it is easy for your readers to know what they must install to use your notebook.

In [1]:
import pandas as pd
import plotnine as gg

In [2]:
from tqdm.notebook import tqdm

### 1.1.2. Configurations

For instance, how to show cell output. 

In [3]:
%%time
# Example with default 'last'
sum(range(10)) #no

print(sum(range(10))) #yes

sum(range(100)) #yes

45
CPU times: user 49 µs, sys: 1 µs, total: 50 µs
Wall time: 49.8 µs


4950

In [4]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"  

#Options: 'all', 'last', 'last_expr', 'none', 'last_expr_or_assign'

In [5]:
# Example v2 with 'all'
sum(range(10))        #yes
print(sum(range(10))) #yes
sum(range(100))       #yes

45

45


4950

## 1.2. Working with data

### 1.2.1. Loading Data

In [6]:
df = pd.read_csv("../data/registration_times.csv")

In [7]:
df.head()
df.dtypes  #not the types we want!

Unnamed: 0,Registration Time,org
0,2022-10-19 13:43:15,cu
1,2022-10-19 18:53:05,cu
2,2022-10-27 09:56:39,cu
3,2022-10-25 13:45:37,cu
4,2022-10-20 09:37:27,cu


Registration Time    object
org                  object
dtype: object

### 1.2.2. Aside: Editor Features

In [8]:
#demonstrate completion
pd.to_datetime  #(tab)

<function pandas.core.tools.datetimes.to_datetime(arg: 'DatetimeScalarOrArrayConvertible | DictConvertible', errors: 'DateTimeErrorChoices' = 'raise', dayfirst: 'bool' = False, yearfirst: 'bool' = False, utc: 'bool' = False, format: 'str | None' = None, exact: 'bool | lib.NoDefault' = <no_default>, unit: 'str | None' = None, infer_datetime_format: 'lib.NoDefault | bool' = <no_default>, origin: 'str' = 'unix', cache: 'bool' = True) -> 'DatetimeIndex | Series | DatetimeScalar | NaTType | None'>

In [9]:
#demonstate help
pd.to_datetime #(shift-tab)

<function pandas.core.tools.datetimes.to_datetime(arg: 'DatetimeScalarOrArrayConvertible | DictConvertible', errors: 'DateTimeErrorChoices' = 'raise', dayfirst: 'bool' = False, yearfirst: 'bool' = False, utc: 'bool' = False, format: 'str | None' = None, exact: 'bool | lib.NoDefault' = <no_default>, unit: 'str | None' = None, infer_datetime_format: 'lib.NoDefault | bool' = <no_default>, origin: 'str' = 'unix', cache: 'bool' = True) -> 'DatetimeIndex | Series | DatetimeScalar | NaTType | None'>

In [None]:
#contextual help in Help > Show Contextual Help

### 1.2.3. Prep Data

In [10]:

df['Registration_Time'] = pd.to_datetime(df['Registration Time'])
df['org'] = pd.Categorical(df.org, categories=['wcm', 'cu', 'other'])
df.dtypes

Registration Time            object
org                        category
Registration_Time    datetime64[ns]
dtype: object

In [13]:
df.describe(include='all', datetime_is_numeric=True)

TypeError: NDFrame.describe() got an unexpected keyword argument 'datetime_is_numeric'

### Plot data

In [15]:
start_t =  pd.to_datetime('2022-10-18 13:00')
stop_t = pd.to_datetime('2022-10-20 13:00')

#pandas to manipulate data
plot_df = (
    df
    .sort_values("Registration_Time")
    .query("(Registration_Time >= @start_t) & (Registration_Time <= @stop_t)")
)

In [16]:
# plotnine to plot (using grammar of graphics like ggplot2)
(
    gg.ggplot(plot_df, gg.aes(x='Registration_Time', fill='org'))
    + gg.geom_histogram(binwidth=4/24, position='stack', color='black', show_legend=False)
    + gg.facet_wrap('org')
    + gg.theme_bw()
    + gg.theme(figure_size=(8,4))
    + gg.theme(axis_text_x=gg.element_text(angle=30, hjust=1))
    + gg.xlab("Time")
    + gg.ylab("Registrations") 
    + gg.ggtitle("Registrations for R workshop in first 48 hours")
)

TypeError: can't compare offset-naive and offset-aware datetimes



TypeError: can't compare offset-naive and offset-aware datetimes

TypeError: can't compare offset-naive and offset-aware datetimes



TypeError: can't compare offset-naive and offset-aware datetimes

In [None]:
## Show save, download, export