# Momentum 23 Attendee Analyis

In [525]:
import pandas as pd
import numpy as np

df23 = pd.read_csv('data/momentum23attendees.csv')
df23.drop(columns=['Ticket Email', 'Ticket Full Name', 'Discount Status', 'Ticket Company Name', 'Ticket Job Title', 'Event', 'Unique Ticket URL', 'Unique Order URL', 'Ticket Phone Number', 'Order Name', 'Order Email', 
                   'Order Phone Number', 'Unique Order URL', 'Ticket First Name', 'Ticket Last Name', 'Ticket Reference', 'Source Type', 'Check-ins: Test', 'Order Amount', 'Order Amount Refunded', 'Order Paid With', 
                   'Order Completed Date (+00:00 UTC)', 'I will be at least 21 years old on October 19th','Momentum 2023', 'Payment Reference', 'Ticket Created Date (+00:00 UTC)', 'Ticket Last Updated Date (+00:00 UTC)', 'Order IP'], inplace=True)
df23.head()

Unnamed: 0,Number,Ticket,Void Status,Price,Tags,Order Reference,Order Discount Code,Order Created Date (+00:00 UTC),Source,What is your favorite Cincinnati pizza place?,Job title,Do you have any dietary restrictions?,T-shirt size,What school are you attending?,Check-ins: Registration,Check-ins: T-shirts
0,3,Early Bird,,149.0,,GPDT,,2023-06-28 20:49:38 UTC,,LaRosa's,Senior Developer,,Unisex - 2XL,,1.0,
1,2,Content Reviewer,,0.0,,MEHE,,2023-07-02 13:03:48 UTC,,Goodfellas,Software Engineer,,Unisex - L,,1.0,
2,3,Content Reviewer,,0.0,,9HDJ,,2023-07-02 13:04:18 UTC,,,Engineering Thaumaturgist,,Unisex - M,,1.0,
3,4,Content Reviewer,,0.0,,EQAC,,2023-07-02 13:04:51 UTC,,,,,,,,
4,5,Content Reviewer,,0.0,,LB9M,,2023-07-02 13:05:17 UTC,,,,,,,,


In [526]:
df23.replace(np.nan, None, inplace=True)
df23.head()

Unnamed: 0,Number,Ticket,Void Status,Price,Tags,Order Reference,Order Discount Code,Order Created Date (+00:00 UTC),Source,What is your favorite Cincinnati pizza place?,Job title,Do you have any dietary restrictions?,T-shirt size,What school are you attending?,Check-ins: Registration,Check-ins: T-shirts
0,3,Early Bird,,149.0,,GPDT,,2023-06-28 20:49:38 UTC,,LaRosa's,Senior Developer,,Unisex - 2XL,,1.0,
1,2,Content Reviewer,,0.0,,MEHE,,2023-07-02 13:03:48 UTC,,Goodfellas,Software Engineer,,Unisex - L,,1.0,
2,3,Content Reviewer,,0.0,,9HDJ,,2023-07-02 13:04:18 UTC,,,Engineering Thaumaturgist,,Unisex - M,,1.0,
3,4,Content Reviewer,,0.0,,EQAC,,2023-07-02 13:04:51 UTC,,,,,,,,
4,5,Content Reviewer,,0.0,,LB9M,,2023-07-02 13:05:17 UTC,,,,,,,,


In [527]:
df23.columns

Index(['Number', 'Ticket', 'Void Status', 'Price', 'Tags', 'Order Reference',
       'Order Discount Code', 'Order Created Date (+00:00 UTC)', 'Source',
       'What is your favorite Cincinnati pizza place?', 'Job title',
       'Do you have any dietary restrictions?', 'T-shirt size',
       'What school are you attending?', 'Check-ins: Registration',
       'Check-ins: T-shirts'],
      dtype='object')

In [528]:
# Ensure the numeric columns are treated as such
df23['Price'] = pd.to_numeric(df23['Price'], errors='coerce')
df23['Check-ins: Registration'] = pd.to_numeric(df23['Price'], errors='coerce')

In [529]:
df23.rename(columns={'Order Created Date (+00:00 UTC)': 'Order Date', 
                     'What school are you attending?': 'School',
                     'Do you have any dietary restrictions?' : 'Dietary Restrictions',
                     'What is your favorite Cincinnati pizza place?': 'Favorite Pizza',
                     }, inplace=True)

# Ensure the Order Date column is in datetime format
df23['Order Date'] = pd.to_datetime(df23['Order Date'])

df23.columns

Index(['Number', 'Ticket', 'Void Status', 'Price', 'Tags', 'Order Reference',
       'Order Discount Code', 'Order Date', 'Source', 'Favorite Pizza',
       'Job title', 'Dietary Restrictions', 'T-shirt size', 'School',
       'Check-ins: Registration', 'Check-ins: T-shirts'],
      dtype='object')

In [530]:
df23.describe()

Unnamed: 0,Number,Price,Check-ins: Registration
count,502.0,501.0,501.0
mean,253.386454,113.666667,113.666667
std,145.404117,78.161082,78.161082
min,2.0,0.0,0.0
25%,128.25,0.0,0.0
50%,253.5,149.0,149.0
75%,379.75,149.0,149.0
max,505.0,239.0,239.0


In [531]:
df23['Check-ins: Registration'].fillna(0, inplace=True)
df23['Check-ins: Registration'].describe()

count    502.000000
mean     113.440239
std       78.247671
min        0.000000
25%        0.000000
50%      149.000000
75%      149.000000
max      239.000000
Name: Check-ins: Registration, dtype: float64

In [532]:
df23['Ticket'].value_counts()

Ticket
Early Bird                225
Sponsor                    66
Regular Price              50
Speaker                    37
Late Registration          34
Procrastinator Pricing     24
Content Reviewer           18
Student                    17
Volunteer                  13
Organizer                   8
VIP                         7
Early Student               3
Name: count, dtype: int64

In [533]:
import plotly.express as px

# Specify defaults for Plotly settings
px.defaults.color_continuous_scale = px.colors.sequential.Agsunset
px.defaults.color_discrete_sequence = px.colors.qualitative.Prism
px.defaults.template = 'plotly_dark'

## Attendee Sources

In [534]:
df23['Source'].replace('cta-widget', 'Call to Action', inplace=True)
df23['Source'].replace('tickets-page-widget', 'Tickets Page', inplace=True)
df23['Source'].fillna('Unknown', inplace=True)

In [535]:
px.histogram(df23[df23['Price'] > 0], color='Source', x='Source', title='Momentum 23 Paid Attendees by Source').update_xaxes(categoryorder='total descending')

## Ticket Types and Order Dates

In [536]:
px.histogram(df23[df23['Price'] > 0], color='Ticket', x='Ticket', title='Momentum 23 Paid Attendees by Ticket Type').update_xaxes(categoryorder='total descending')

In [537]:
px.scatter(df23[df23['Price'] > 0].sort_values('Price'), x='Order Date', y='Price', color='Ticket', title='Momentum 23 Order Price by Date & Ticket Type', color_discrete_sequence=px.colors.sequential.Agsunset_r)


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



In [538]:
px.density_heatmap(df23[df23['Price'] > 0], x='Order Date', y='Ticket', title='Momentum 23 Ticket Type by Date')


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



In [539]:
px.density_heatmap(df23[df23['Price'] > 0], x='Order Date', y='Price', title='Momentum 23 Ticket Price by Date')


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



## Check-In Count

In [540]:
px.density_heatmap(df23[df23['Price'] > 0], x='Order Date', y='Check-ins: Registration', title='Momentum 23 Check-ins by order date')


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



In [541]:
px.density_heatmap(df23, y='Ticket', x='Check-ins: Registration', title='Momentum 23 Check-ins by Ticket Type')

In [542]:
px.scatter(df23[df23['Check-ins: Registration'] == 0].sort_values('Order Date'), y='Ticket', x='Order Date', title='No Check-ins by Ticket and Order Date', color='Ticket')


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



## T-Shirt Sizes

In [543]:
df23['T-shirt size'].fillna('None', inplace=True)
px.histogram(df23, x='T-shirt size', title='T-Shirt Sizes', color='T-shirt size').update_xaxes(categoryorder='total descending')

In [544]:
px.density_heatmap(df23[df23['T-shirt size'] == 'None'], x='Order Date', y='Ticket', title='No T-Shirt Size by Order Date and Ticket Type')


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



## Womens T-Shirts

In [545]:
df23['Is Womens Shirt'] = df23['T-shirt size'].str.contains('Women', case=False)

In [546]:
px.histogram(df23, x='Is Womens Shirt', title='Attendees by Womens Shirt Order', color='Is Womens Shirt')

In [547]:
px.scatter(df23[df23['Is Womens Shirt'] == True], y='Ticket', x='Order Date', title='Momentum 23 Women Order Date')


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



In [548]:
px.density_heatmap(df23[df23['Is Womens Shirt'] == True], y='Ticket', x='Order Date', title='Momentum 23 Women Order Date')


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



In [549]:
px.scatter(df23[df23['Is Womens Shirt'] == True], y='Check-ins: Registration', x='Order Date', title='Momentum 23 Women Check-ins by Order Date')


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



## Schools

In [550]:
px.histogram(df23, x='School', title='Momentum 23 Attendees by School', color='School').update_xaxes(categoryorder='total descending')

In [551]:
df23['School'].replace('TechElevator', 'Tech Elevator', inplace=True)
df23['School'].replace('tech elevator', 'Tech Elevator', inplace=True)
df23['School'].replace('Tech Elevator, UC', 'University of Cincinnati', inplace=True)
df23['School'].replace('Cincinnati State Technical and Community College', 'Cincinnati State', inplace=True)

In [552]:
px.histogram(df23, x='School', title='Momentum 23 Student Attendees by School', color='School').update_xaxes(categoryorder='total descending')

In [553]:
px.density_heatmap(df23[df23['Ticket'].isin(['Student', 'Early Student'])], x='School', y='Ticket', title='Momentum 23 Student Attendees by School')

In [554]:
px.scatter(df23[df23['Ticket'].isin(['Student', 'Early Student'])], x='Order Date', y='School', color='School', title='Momentum 23 Student Attendees by School')


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



## Pizza Preferences

In [555]:
px.histogram(df23, x='Favorite Pizza', title='Momentum 23 Favorite Pizzas')

In [556]:
df23['Favorite Pizza'].unique()

array(["LaRosa's", 'Goodfellas', None, "Adriatico's", "Dewey's",
       'Camporosso', 'Kosher', 'Goodfellas, Fratellis', 'Taglio',
       'Bourbon House',
       "Dewey's, Crash at Michael's and just wait at the dining table until he brings you the perfect pizza from the local eateries.",
       "Dewey's, Tazikis",
       "I'm not from there and have no clue what's good for pizza in Cincinnati.",
       'Surprise me 🍕🎲', 'Two Cities',
       "I don't know any Cinci pizza places! I feel like an outsider now ;)",
       "Adriatico's, Don't make me pick between Adriatico's and Dewey's :)",
       "LaRosa's, In Cincinnati, the bells ring ecstatic, For Momentum, the tech con fanatic. Softwares’ charm on display, Ideas bloom like the day, It's a meeting both fun and pragmatic.",
       'Whoever makes the best Gluten Free pizza. ;-)', 'Papa John’s',
       "Mio's", 'Saint Francis Apizza', 'Dominos', 'Goodfellas, Skyline',
       "Dewey's, Mac's Pizza Pub", "LaRosa's, Papa Johns | or Goodfella

Note this entry was mine:
> "Dewey's, Crash at Michael's and just wait at the dining table until he brings you the perfect pizza from the local eateries.",

My own impishness is now coming back to bite me at the data cleaning stage

In [557]:
df23['Favorite Pizza'].replace("Dewey's, Crash at Michael's and just wait at the dining table until he brings you the perfect pizza from the local eateries.", "Dewey's", inplace=True)
df23['Favorite Pizza'].replace("Deweys", "Dewey's", inplace=True)
df23['Favorite Pizza'].replace("LaRosa's, LaRosa's", "LaRosa's", inplace=True)

# We could do this all day, but we really just care about the top contenders

In [558]:
df23['Favorite Pizza'].value_counts()

Favorite Pizza
Dewey's                                 107
LaRosa's                                 58
Goodfellas                               45
Adriatico's                              42
Camporosso                               11
                                       ... 
Kosher                                    1
Dewey's, LaRosas                          1
Camporosso, Agilo's Pizza Mt Healthy      1
Trottas                                   1
The Wheel                                 1
Name: count, Length: 63, dtype: int64

In [559]:
# Get the top 5 favorite pizzas
dfPizzas = df23['Favorite Pizza'].value_counts().head(7)
dfPizzas

Favorite Pizza
Dewey's          107
LaRosa's          58
Goodfellas        45
Adriatico's       42
Camporosso        11
Bourbon House     11
Taglio             4
Name: count, dtype: int64

In [560]:
px.histogram(df23[df23['Favorite Pizza'].isin(dfPizzas.index)], x='Favorite Pizza', title='Momentum 23 Pizza Preferences', color='Favorite Pizza').update_xaxes(categoryorder='total descending')

In [561]:
px.density_heatmap(df23[df23['Favorite Pizza'].isin(dfPizzas.index)], x='Favorite Pizza', y='Ticket', title='Momentum 23 Pizza Preferences by Ticket Type')

In [562]:
px.density_heatmap(df23[df23['Favorite Pizza'].isin(dfPizzas.index)], x='Favorite Pizza', y='Dietary Restrictions', title='Momentum 23 Pizza Preferences by Dietary Restrictions')

In [563]:
df23['Dietary Restrictions'].fillna('None', inplace=True)

In [564]:
px.density_heatmap(df23[df23['Favorite Pizza'].isin(dfPizzas.index)], x='Favorite Pizza', y='Dietary Restrictions', title='Momentum 23 Pizza Preferences by Dietary Restrictions')

In [565]:
px.scatter(df23[df23['Favorite Pizza'].isin(dfPizzas.index)], y='Favorite Pizza', x='Order Date', color='Favorite Pizza', title='Momentum 23 Pizza Preferences by Order Date (because why not?)')


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



## Job Titles

In [566]:
df23['Job title'].value_counts()

Job title
Software Engineer                   52
Senior Software Engineer            24
Software Developer                  21
Principal Software Engineer         14
Advanced Software Engineer          14
                                    ..
Talent Acquisition Consultant        1
Consulting Data Engineer             1
Vice President                       1
Director - CX Cloud Platform         1
Collaborative Software Developer     1
Name: count, Length: 231, dtype: int64

In [567]:
df23['Job title'].value_counts().head(25)

Job title
Software Engineer                        52
Senior Software Engineer                 24
Software Developer                       21
Principal Software Engineer              14
Advanced Software Engineer               14
Student                                  11
Engineering Manager                       9
Lead Software Engineer                    8
Cognitive Computing Platform Engineer     7
Developer                                 5
Data Engineer                             5
Principal Consultant                      4
Senior Developer                          4
Product Manager                           4
Developer Advocate                        4
Business Development Manager              3
Senior UX Consultant                      3
Sr Software Engineer                      3
IT Professional Applications              3
Engineer                                  3
Instructor                                3
Software Engineering Manager              3
Lead Data Engineer    

In [568]:
# Add a software engineer column for if the job title contains 'software engineer' or 'developer'
df23['Job.Software Engineer'] = df23['Job title'].str.contains('software engineer|developer|Software Development Engineer|consultant', case=False)

# Now let's look at our jobs who AREN'T software engineers
df23[df23['Job.Software Engineer'] == False]['Job title'].value_counts().head(25)

Job title
Student                                   11
Engineering Manager                        9
Cognitive Computing Platform Engineer      7
Data Engineer                              5
Product Manager                            4
Business Development Manager               3
Lead Data Engineer                         3
Engineer                                   3
Instructor                                 3
CEO                                        3
IT Professional Applications               3
Automation Engineer                        2
Principal Platform Engineer                2
Owner                                      2
Technical Recruiter                        2
Senior Programmer Analyst                  2
student                                    2
Security Engineer                          2
Cognitive Computing Platform Architect     2
Information Security Engineer              2
Engineering Management                     2
Architect                                  2


In [569]:
df23['Job.Student'] = df23['Job title'] == 'Student'
df23['Job.Engineering Manager'] = df23['Job title'].str.contains('Engineering Manager|Lead|Engineering Management|Instructor', case=False)
df23['Job.Product Manager'] = df23['Job title'].str.contains('Product Manager|Product Owner|Product Management', case=False)
df23['Job.Data'] = df23['Job title'].str.contains('Automation|Data|Cognitive|Database', case=False)
df23['Job.Analyst'] = df23['Job title'].str.contains('Analyst|Business Intelligence|BI|Architect', case=False)
df23['Job.Designer'] = df23['Job title'].str.contains('Designer|UX|UI|User Experience', case=False)
df23['Job.Executive'] = df23['Job title'].str.contains('CEO|CTO|VP', case=False)

In [571]:
# Set a Job column based on the first of the Job.* columns that is true
df23['Job'] = df23[['Job.Software Engineer', 'Job.Student', 'Job.Engineering Manager', 'Job.Product Manager', 'Job.Data', 'Job.Analyst', 'Job.Designer', 'Job.Executive']].idxmax(axis=1).str.replace('Job.', '')
df23['Job'].fillna('Other', inplace=True)
df23['Job'].value_counts()

Job
Software Engineer      324
Student                 56
Executive               33
Engineering Manager     30
Data                    26
Analyst                 18
Designer                10
Product Manager          5
Name: count, dtype: int64

In [573]:
px.histogram(df23, x='Job', title='Momentum 23 Attendees by Job Area', color='Job').update_xaxes(categoryorder='total descending')