In [1]:
import numpy as np
import pandas as pd

attendance_raw = []
for i in range(1,25):
    student_id = f"S{i:03d}"

    cohorts = ["alpha", "beta", "gamma"]
    cohort = np.random.choice(cohorts)
    attended_sessions = np.random.randint(0, 7)
    expected_sessions = 6

    record = {
        "student_id": student_id,
        "cohort": cohort,
        "attended_sessions": attended_sessions,
        "expected_sessions": expected_sessions
    }

    attendance_raw.append(record)

attendance = pd.DataFrame(attendance_raw)
attendance.head()


Unnamed: 0,student_id,cohort,attended_sessions,expected_sessions
0,S001,beta,4,6
1,S002,gamma,6,6
2,S003,beta,1,6
3,S004,beta,1,6
4,S005,alpha,2,6


In [2]:
attendance.info()

<class 'pandas.DataFrame'>
RangeIndex: 24 entries, 0 to 23
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype
---  ------             --------------  -----
 0   student_id         24 non-null     str  
 1   cohort             24 non-null     str  
 2   attended_sessions  24 non-null     int64
 3   expected_sessions  24 non-null     int64
dtypes: int64(2), str(2)
memory usage: 900.0 bytes


In [3]:
attendance_indexed = attendance.set_index('student_id')

excused_absences = pd.Series(data=[1, 2, 6, 1, 3, 4, 1, 5, 2, 4], index=['S001','S003','S012','S017','S020','S022','S044','S008','S030','S027'])

attendance_indexed['adjusted_attendance'] = (attendance_indexed["attended_sessions"] + excused_absences)
attendance_indexed

Unnamed: 0_level_0,cohort,attended_sessions,expected_sessions,adjusted_attendance
student_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
S001,beta,4,6,5.0
S002,gamma,6,6,
S003,beta,1,6,3.0
S004,beta,1,6,
S005,alpha,2,6,
S006,gamma,4,6,
S007,gamma,0,6,
S008,gamma,5,6,10.0
S009,beta,0,6,
S010,beta,5,6,


In [5]:
attendance_indexed["adjusted_attendance"] = (
    attendance_indexed["adjusted_attendance"]
    .fillna(attendance_indexed["attended_sessions"])
)
print(attendance_indexed['adjusted_attendance'])

student_id
S001     5.0
S002     6.0
S003     3.0
S004     1.0
S005     2.0
S006     4.0
S007     0.0
S008    10.0
S009     0.0
S010     5.0
S011     1.0
S012    11.0
S013     5.0
S014     6.0
S015     1.0
S016     3.0
S017     4.0
S018     2.0
S019     3.0
S020     6.0
S021     3.0
S022     9.0
S023     2.0
S024     3.0
Name: adjusted_attendance, dtype: float64


In [6]:
attendance_indexed.loc['S001', 'cohort'] = ' Beta '
attendance_indexed.loc['S003', 'cohort'] = 'BETA'
attendance_indexed.loc['S005', 'cohort'] = 'alpha '
attendance_indexed.loc['S010', 'cohort'] = 'BeTa '
attendance_indexed.loc['S017', 'cohort'] = ' BEta '
attendance_indexed.loc['S024', 'cohort'] = ' GaMmA'

In [8]:
attendance_indexed['cohort'] = attendance_indexed['cohort'].str.strip().str.lower()
attendance_indexed['cohort']

<StringArray>
['beta', 'gamma', 'alpha']
Length: 3, dtype: str


student_id
S001     beta
S002    gamma
S003     beta
S004     beta
S005    alpha
S006    gamma
S007    gamma
S008    gamma
S009     beta
S010     beta
S011    gamma
S012    alpha
S013    alpha
S014    gamma
S015    gamma
S016    alpha
S017     beta
S018     beta
S019    alpha
S020    alpha
S021     beta
S022     beta
S023     beta
S024    gamma
Name: cohort, dtype: str

In [9]:
print(attendance_indexed['cohort'].unique())

<StringArray>
['beta', 'gamma', 'alpha']
Length: 3, dtype: str


In [16]:
low_attendance = attendance_indexed[attendance_indexed['attended_sessions'] < attendance_indexed['expected_sessions']]
summary = low_attendance.groupby('cohort')['attended_sessions'].mean()
print(summary)

cohort
alpha    3.500000
beta     2.600000
gamma    2.333333
Name: attended_sessions, dtype: float64


In [18]:
attendance_indexed['attendance_ok'] = attendance_indexed['attended_sessions'] >= attendance_indexed['expected_sessions']
attendance_indexed

Unnamed: 0_level_0,cohort,attended_sessions,expected_sessions,adjusted_attendance,attendance_ok
student_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
S001,beta,4,6,5.0,False
S002,gamma,6,6,6.0,True
S003,beta,1,6,3.0,False
S004,beta,1,6,1.0,False
S005,alpha,2,6,2.0,False
S006,gamma,4,6,4.0,False
S007,gamma,0,6,0.0,False
S008,gamma,5,6,10.0,False
S009,beta,0,6,0.0,False
S010,beta,5,6,5.0,False
