### Download current Checkin CSV from the [checkin website](https://msc-registration-checkin.onrender.com/)

In [1]:
import pandas as pd

df = pd.read_csv('./MSC-Checkin.csv')

In [2]:
# Create new column mappings
df = df.rename(columns={
    'Contact First Name': 'ContactName.first',
    'Contact Last Name': 'ContactName.last', 
    'First Name': 'Participants.name.first',
    'Last Name': 'Participants.name.last',
    'Gender': 'Participants.gender'
})

# Reorder columns
df = df[['Family ID', 'ContactName.first', 'ContactName.last', 'Email', 'Center', 'City', 
         'Participants.name.first', 'Participants.name.last', 'Participants.gender',
         'Category', 'PAID', 'Decided', 'Check-in Status']]

# Save to new CSV
df.to_csv('./MSC-Checkin.csv', index=False)


### Total # of People + Families Checked In

In [3]:
# Count checked in people
checked_in = df[df['Check-in Status'] == 'checked-in']
num_checked_in = len(checked_in)

print(f"Number of people checked in: {num_checked_in}")
print("\nFamily IDs of checked in people:")
print(checked_in['Family ID'].unique())
print("\nNumber of unique families checked in:")
print(len(checked_in['Family ID'].unique()))

Number of people checked in: 762

Family IDs of checked in people:
[ 32  37   1   2   3  10  11  16  17  18  20  22  26  29  30  31  33  35
  38  39  40  41  42  43  45  48  54  55  59  64  66  67  69  71  72  73
  74  75  76  77  78  79  80  81  83  84  85  86  87  89  91  92  95  97
  98  99 100 103 104 106 107 114 115 116 117 118 123 125 126 127 128 129
 131 132 135 139 140 141 142 143 144 145 147 149 151 152 153 154 155 156
 159 161 163 164 169 171 175 179 181 182 183 184 185 186 189 191 195 196
 199 200 205 207 210 211 214 215 216 217 218 219 221 222 225 227 230 232
 234 236 239 240 241 244 245 248 251 255 258 260 261 262 263 265 267 268
 269 271 275 276 277 278 281 282 286 289 293 294 295 296 297 298 299 301
 303 306 308 309 312 313 316 317 318 320 321 323 327 329 330 331 332 334
 335 337 339 344 345 350 359 360 365 366 368 369 370 372 373 374 375 376
 377 379 380 381 383 384 386 387 389 390 391 393 394 395 396 397 398 399
 401 402 404 411 418 423 425 430 433 434 435 440 441 442 

### Checkin Count by Gender

In [4]:
# Get gender counts
gender_counts = checked_in['Participants.gender'].value_counts()
num_males = gender_counts.get('M', 0)
num_females = gender_counts.get('F', 0)

# Get adult count
num_adults = len(checked_in[checked_in['Category'] == 'Adult'])

# Get counts by grade/age
grade_counts = checked_in[checked_in['Category'].astype(str).str.isnumeric()]['Category'].value_counts().sort_index()

print(f"\nChecked-in Statistics:")
print(f"Total checked in: {num_checked_in}")
print(f"Males checked in: {num_males}")
print(f"Females checked in: {num_females}")
print(f"Adults checked in: {num_adults}")
# Define grade order
grade_order = ['Infant/ShishuVihar', 'Pre-KG', 'KG'] + [str(i) for i in range(1,13)]

# Convert grades to categorical with custom order
grade_counts = checked_in[checked_in['Category'].astype(str).str.isnumeric()]['Category']
grade_counts = grade_counts.astype('category').cat.set_categories(grade_order, ordered=True)
grade_counts = grade_counts.value_counts().sort_index()

print("\nChildren checked in by grade/age:")
print(grade_counts.to_string())



Checked-in Statistics:
Total checked in: 762
Males checked in: 330
Females checked in: 432
Adults checked in: 640

Children checked in by grade/age:
Infant/ShishuVihar     0
Pre-KG                 0
KG                     0
1                      5
2                      6
3                      3
4                     11
5                      7
6                      8
7                     10
8                     17
9                     17
10                    12
11                    14
12                     6


In [5]:
# Get not checked in participants
not_checked_in = df[df['Check-in Status'] != 'checked-in']

# Count adults not checked in
adults_not_checked = len(not_checked_in[not_checked_in['Category'] == 'Adult'])

print("\nNot Checked In Statistics:")
print(f"Total not checked in: {len(not_checked_in)}")
print(f"Adults not checked in: {adults_not_checked}")

# Get grade counts for not checked in
grade_order = ['Infant/ShishuVihar', 'Pre-KG', 'KG'] + [str(i) for i in range(1,13)]
not_checked_grades = not_checked_in[not_checked_in['Category'].astype(str).str.isnumeric()]['Category']
not_checked_grades = not_checked_grades.astype('category').cat.set_categories(grade_order, ordered=True)
not_checked_grade_counts = not_checked_grades.value_counts().sort_index()

print("\nChildren not checked in by grade/age:")
print(not_checked_grade_counts.to_string())



Not Checked In Statistics:
Total not checked in: 152
Adults not checked in: 138

Children not checked in by grade/age:
Infant/ShishuVihar    0
Pre-KG                0
KG                    0
1                     1
2                     1
3                     2
4                     1
5                     1
6                     1
7                     1
8                     1
9                     1
10                    1
11                    1
12                    1


### Delete old `checkin_statistics.xlsx` file

In [6]:
# Delete checkin_statistics.xlsx if it exists
import os
if os.path.exists('checkin_statistics.xlsx'):
    os.remove('checkin_statistics.xlsx')


### Update with new `checkin_statistics.xlsx` file

In [7]:
import pandas as pd

# === Sheet 1: Checked In Participants ===
checked_in = df[df['Check-in Status'].notna() & (df['Check-in Status'] != '')]
num_checked_in = len(checked_in)

gender_counts = checked_in['Participants.gender'].value_counts()
num_males = gender_counts.get('M', 0)
num_females = gender_counts.get('F', 0)

num_adults = len(checked_in[checked_in['Category'] == 'Adult'])
adult_males = len(checked_in[(checked_in['Category'] == 'Adult') & (checked_in['Participants.gender'] == 'M')])
adult_females = len(checked_in[(checked_in['Category'] == 'Adult') & (checked_in['Participants.gender'] == 'F')])

grade_order = ['Infant/ShishuVihar', 'Pre-KG', 'KG'] + [str(i) for i in range(1, 13)]
grade_data_checked = checked_in[checked_in['Category'].isin(grade_order)]
grade_counts = grade_data_checked['Category'].astype('category')
grade_counts = grade_counts.cat.set_categories(grade_order, ordered=True)
grade_counts = grade_counts.value_counts().sort_index()

grade_gender_counts = {}
for grade in grade_counts.index:
    grade_gender_counts[grade] = {
        'M': len(checked_in[(checked_in['Category'] == grade) & (checked_in['Participants.gender'] == 'M')]),
        'F': len(checked_in[(checked_in['Category'] == grade) & (checked_in['Participants.gender'] == 'F')])
    }

summary_stats_checked_in = pd.DataFrame({
    'Metric': ['Total Checked In', 'Males Checked In', 'Females Checked In', 'Adults Checked In'] +
              [f'Grade {grade}' for grade in grade_counts.index],
    'Male': [num_males, num_males, 0, adult_males] +
            [grade_gender_counts[grade]['M'] for grade in grade_counts.index],
    'Female': [num_females, 0, num_females, adult_females] +
              [grade_gender_counts[grade]['F'] for grade in grade_counts.index],
    'Count': [num_checked_in, num_males, num_females, num_adults] +
             list(grade_counts.values)
})

# === Sheet 2: Not Checked In Participants ===
not_checked_in = df[(df['Check-in Status'] == '') | (df['Check-in Status'].isna())]
num_not_checked_in = len(not_checked_in)

gender_counts_nc = not_checked_in['Participants.gender'].value_counts()
num_males_nc = gender_counts_nc.get('M', 0)
num_females_nc = gender_counts_nc.get('F', 0)

num_adults_nc = len(not_checked_in[not_checked_in['Category'] == 'Adult'])
adult_males_nc = len(not_checked_in[(not_checked_in['Category'] == 'Adult') & (not_checked_in['Participants.gender'] == 'M')])
adult_females_nc = len(not_checked_in[(not_checked_in['Category'] == 'Adult') & (not_checked_in['Participants.gender'] == 'F')])

grade_data_nc = not_checked_in[not_checked_in['Category'].isin(grade_order)]
grade_counts_nc = grade_data_nc['Category'].astype('category')
grade_counts_nc = grade_counts_nc.cat.set_categories(grade_order, ordered=True)
grade_counts_nc = grade_counts_nc.value_counts().sort_index()

grade_gender_counts_nc = {}
for grade in grade_counts_nc.index:
    grade_gender_counts_nc[grade] = {
        'M': len(not_checked_in[(not_checked_in['Category'] == grade) & (not_checked_in['Participants.gender'] == 'M')]),
        'F': len(not_checked_in[(not_checked_in['Category'] == grade) & (not_checked_in['Participants.gender'] == 'F')])
    }

summary_stats_not_checked = pd.DataFrame({
    'Metric': ['Total Not Checked In', 'Males Not Checked In', 'Females Not Checked In', 'Adults Not Checked In'] +
              [f'Grade {grade}' for grade in grade_counts_nc.index],
    'Male': [num_males_nc, num_males_nc, 0, adult_males_nc] +
            [grade_gender_counts_nc[grade]['M'] for grade in grade_counts_nc.index],
    'Female': [num_females_nc, 0, num_females_nc, adult_females_nc] +
              [grade_gender_counts_nc[grade]['F'] for grade in grade_counts_nc.index],
    'Count': [num_not_checked_in, num_males_nc, num_females_nc, num_adults_nc] +
             list(grade_counts_nc.values)
})

# === Export both to the same Excel file ===
with pd.ExcelWriter('checkin_statistics.xlsx') as writer:
    summary_stats_checked_in.to_excel(writer, sheet_name='Checked In', index=False)
    summary_stats_not_checked.to_excel(writer, sheet_name='Not Checked In', index=False)

print("Both sheets exported to checkin_statistics.xlsx")


Both sheets exported to checkin_statistics.xlsx


### How many people have checked in so far?

In [8]:
# Calculate check-in percentage
total_registered = len(df)  # Total number of registered participants
total_checked_in = len(checked_in)  # Total number of checked-in participants
checkin_percentage = (total_checked_in / total_registered) * 100

print(f"\nCheck-in Statistics:")
print(f"Total Registered: {total_registered}")
print(f"Total Checked In: {total_checked_in}")
print(f"Check-in Percentage: {checkin_percentage:.1f}%")

# Add to summary stats
checkin_stats = pd.DataFrame({
    'Metric': ['Check-in Percentage'],
    'Count': [f"{checkin_percentage:.1f}%"],
    'Male': ['--'],
    'Female': ['--']
})




Check-in Statistics:
Total Registered: 914
Total Checked In: 762
Check-in Percentage: 83.4%


In [9]:
# Display not checked in participants
not_checked_in = df[~df['Check-in Status'].str.contains('checked-in', na=False)]

print(f"\Participants Not Checked In: {len(not_checked_in)}")


\Participants Not Checked In: 152


  print(f"\Participants Not Checked In: {len(not_checked_in)}")


### How many people from SJ haven't checked in yet?

In [10]:
# Calculate San Jose center check-in stats
san_jose_df = df[df['Center'] == 'cm_San_Jose']
san_jose_not_checked = san_jose_df[~san_jose_df['Check-in Status'].str.contains('checked-in', na=False)]

num_san_jose_total = len(san_jose_df)
num_san_jose_not_checked = len(san_jose_not_checked)

print(f"\nSan Jose Center Statistics:")
print(f"Total Registered: {num_san_jose_total}")
print(f"Not Checked In: {num_san_jose_not_checked}")
print(f"Check-in Rate: {((num_san_jose_total - num_san_jose_not_checked) / num_san_jose_total * 100):.1f}%")

# Add to summary stats
san_jose_stats = pd.DataFrame({
    'Metric': ['San Jose - Not Checked In'],
    'Count': [num_san_jose_not_checked],
    'Male': ['--'],
    'Female': ['--']
})




San Jose Center Statistics:
Total Registered: 598
Not Checked In: 22
Check-in Rate: 96.3%
