# Coronavirus School Report Cards
Mock-up a demo case that could be scaled to LAUSD.
Pare back columns to bare bones for what's needed to do the school report cards.
Add rules for how NYC is quarantining / investigating cases as tests come back positive.

In [1]:
import pandas as pd

from IPython.display import Markdown

In [2]:
df = pd.read_parquet('../scratch/master_student_df.parquet')
test_results = pd.read_parquet('../scratch/tested_positive.parquet')

DATES = list(test_results.date.unique())
DATES

['9/1/20', '9/2/20']

In [3]:
display(Markdown(
    f"In this simple example, there are {df.school.nunique()} schools, "
    f"{df.pod.nunique()} unique classrooms / pods within these schools, "
    f"and {df.student_id.nunique()} students total."
    f"We will demo what getting results for {DATES[0]} and {DATES[1]}, "
    "and produce coronavirus school report cards for each day."
))

In this simple example, there are 5 schools, 24 unique classrooms / pods within these schools, and 100 students total.We will demo what getting results for 9/1/20 and 9/2/20, and produce coronavirus school report cards for each day.

## Apply quarantine rules / school protocols
* When on student is infected in a pod, whole pod QT 14 days
* If there is second case in pod, whole school is investigated for possible QT and closure
* If there is second case outside pod, pod QT 14 days and school closes for 1 day for investigation
* If link btwn cases cannot be determined, close entire school, everyone QT 14 days
* If link is outside the school, then affected pods QT, but school can reopen
* If link can be determined and the 2 cases are not linked, then affected pods QT, but school can reopen

In [4]:
def unique_infections(df, aggregation_level):
    new_col = f"num_{aggregation_level}_infections"
    summary = (df.groupby(aggregation_level)["infection_id"].nunique()
                    .to_frame().reset_index()
                    .rename(columns = {"infection_id": new_col})
                )
    
    return summary

def assign_pod_QT(row):    
    if row.num_pod_infections >= 1:
        return 1
    else:
        return 0
    
def apply_positives(student_df, tested_positive):
    # 1:m merge beacuse there could be multiple sources of infections that affect same student
    df = pd.merge(student_df, tested_positive, on = "student_id", how = "left", validate = "1:m")
    
    # If you were not infected yesterday but infected today, we want to count you as infected.
    # If you were infected yesterday, and not today, we still want to count you as infected.
    # Look across these 2 rows and take the max, so when we aggregate later, it's a cumulative today.
    df2 = (df.assign(
        student_infected = (df[["student_infected", "infected"]].max(axis=1)
                          .fillna(0).astype(int)
                         )
        )
    )
    
    # See how many unique infections occur within a pod
    pod_infections = unique_infections(df2, "pod")

    # See how many unique infections occur within a school
    school_infections = unique_infections(df2, "school")
    
    # Merge onto student-level df
    # (Eventually, think about a student-virus level df, if students are assumed to be infected multiple times in school year)
    m1 = pd.merge(df2, pod_infections, on = "pod", how = "left", validate = "m:1")
    m2 = pd.merge(m1, school_infections, on = "school", how = "left", validate = "m:1")
    
    # Assign pod QT
    m3 = m2.assign(
       pod_QT_today = m2.apply(assign_pod_QT, axis=1)
    )
    
    # Similarly, flag the pod as already being in QT
    # This needs to be revised to include a 14-day window....otherwise it's on forever
    m3 = (m3.assign(
        pod_QT = m3[["pod_QT", "pod_QT_today"]].max(axis=1).fillna(0).astype(int)
        ).drop(columns = "pod_QT_today")
    )
    
    return m3

In [5]:
def assign_school_QT(df):
    df2 = df[["school", "infection_id", "pod"]].drop_duplicates().reset_index()
    
    # If same infection_id affects multiple pods, close school
    pods_affected = (df2[df2.infection_id.notna()]
                     .groupby(["school", "infection_id"])
                     .agg({"pod": "nunique"})
                     .rename(columns = {"pod": "num_pods_affected"})
                     .reset_index()
                    )
    
    # Schools can be forced into QT when multiple pods affected and no link established 
    # If link is established and it's outside school, only affected pods QT
    pods_affected = (pods_affected.assign(
           school_QT = pods_affected.apply(lambda x: 1 if x.num_pods_affected > 1 else 0, axis=1)
        )[["school", "school_QT"]]
    )
    
    # Schools can already be in QT mode when more positive tests come in, so take max
    pods_affected = pods_affected.assign(
        school_QT = pods_affected.groupby("school")["school_QT"].transform("max")
    ).drop_duplicates()
    
    return pods_affected

In [6]:
# Use this as high-level function to call other functions defined above
def apply_quarantine_rules(df, tested_positive):
    df2 = apply_positives(df, tested_positive)

    school_QT_df = assign_school_QT(df2)
    df2 = pd.merge(df2.drop(columns = "school_QT"), 
                   school_QT_df, 
                   on = "school", how = "left", validate = "m:1")

    keep = ["student_id", "student", 
            "classroom", "school", "pod", 
            "student_infected", 
            "pod_QT", "school_QT"
           ]
    
    df3 = (df2[keep]
           .assign(
               student_infected = (df2.groupby("student_id")["student_infected"].transform("max")
                                   .fillna(0).astype(int)
                                  ),
               pod_QT = df2.pod_QT.fillna(0).astype(int),
               school_QT = df2.school_QT.fillna(0).astype(int),
               date = tested_positive.date.iloc[0]
           )
          )
    
    return df3 

In [7]:
def summary_report_card(df, aggregation_level):
    students_confirmed_positive = (df.groupby(aggregation_level)
                                   .agg({
                                       "date": "min",
                                       "student_infected": "sum", 
                                       "student_id": "nunique"
                                   })
                                   .reset_index()
                                   .rename(columns = {
                                       "student_infected": f"students_confirmed_positive", 
                                       "student_id": f"students_QT"
                                   })
                                  )
    
    if aggregation_level == "school":
        pods_affected = (df.groupby(aggregation_level)
                         .agg({"pod": "nunique"})
                         .reset_index()
                         .rename(columns = {"pod": f"{aggregation_level}_pods_QT"})
                        )
        # Merge on this extra info about number of pods affected for schools
        students_confirmed_positive = pd.merge(students_confirmed_positive, pods_affected, 
                                               on = "school", how = "left", validate = "1:1"
                                              )
    
    # Can define column order
    return students_confirmed_positive                         

In [8]:
test_results1 = test_results[test_results.date == DATES[0]]
test_results2 = test_results[test_results.date == DATES[1]]

In [9]:
start_cols = ['student', 'classroom', 'school', 'student_id', 'pod', 
              'student_infected', 'pod_QT', 'school_QT']

day1 = apply_quarantine_rules(df[start_cols], test_results1)
pod_summary1 = summary_report_card(day1, "pod")
school_summary1 = summary_report_card(day1, "school")

display(pod_summary1)
display(school_summary1)

Unnamed: 0,pod,date,students_confirmed_positive,students_QT
0,0-0,9/1/20,0,1
1,0-1,9/1/20,0,2
2,0-2,9/1/20,0,9
3,0-3,9/1/20,0,3
4,0-4,9/1/20,2,7
5,1-0,9/1/20,2,4
6,1-1,9/1/20,0,8
7,1-2,9/1/20,0,3
8,1-4,9/1/20,0,2
9,2-0,9/1/20,0,4


Unnamed: 0,school,date,students_confirmed_positive,students_QT,school_pods_QT
0,0,9/1/20,2,22,5
1,1,9/1/20,2,17,4
2,2,9/1/20,0,18,5
3,3,9/1/20,2,22,5
4,4,9/1/20,0,21,5


In [10]:
day2 = apply_quarantine_rules(day1[start_cols], test_results2)
pod_summary2 = summary_report_card(day2, "pod")
school_summary2 = summary_report_card(day2, "school")

display(pod_summary2)
display(school_summary2)

Unnamed: 0,pod,date,students_confirmed_positive,students_QT
0,0-0,9/2/20,0,1
1,0-1,9/2/20,0,2
2,0-2,9/2/20,0,9
3,0-3,9/2/20,0,3
4,0-4,9/2/20,2,7
5,1-0,9/2/20,2,4
6,1-1,9/2/20,2,8
7,1-2,9/2/20,0,3
8,1-4,9/2/20,0,2
9,2-0,9/2/20,0,4


Unnamed: 0,school,date,students_confirmed_positive,students_QT,school_pods_QT
0,0,9/2/20,2,22,5
1,1,9/2/20,4,17,4
2,2,9/2/20,0,18,5
3,3,9/2/20,6,22,5
4,4,9/2/20,0,21,5
