In [1]:
import pandas as pd
import numpy as np

In [None]:
"""
Iterate through the schools to determine if the gap between YouScience Results and Skyward Rosters
are as drastic as in Blackman (1006 YS vs. 612 Skyward)
"""

In [2]:
# read in dataframes
# compare rosters
# print the gap as well as the actionable number of students
# return an updated dataframe
def compare_rosters(skyward_path = None, youscience_path = None, print_len=0):
    sky_df = pd.read_excel(skyward_path)
    ys_df = pd.read_csv(youscience_path)
    # pruned_df will be a tidied YS df that has only 8th graders from that school
    pruned_df = pd.read_csv(youscience_path)


    if print_len != 0:
        print(f'Comparing {school} rosters.\nYS length: {int(len(ys_df)/3)}\nSkyward length: {len(sky_df)}')
    # need to know 8th graders with missing YS results
    not_in_ys = []
    # EDIT: we know there are duplicated students based on prior teacher changes
    sky_df.drop_duplicates('Student\'s School Email', inplace=True)
    skyward_emails = sky_df['Student\'s School Email']
    for email in skyward_emails:
        # check if student has YouScience Results
        check = ys_df.loc[ys_df.email == email]
        if len(check) == 0:
            # Then add their name to not_in_ys
            student = sky_df.loc[sky_df['Student\'s School Email'] == email]
            first = str(student['Student First Name']).split('\n')[0].split()[1]
            last = str(student['Student Last Name']).split('\n')[0].split()[1]
            not_in_ys.append([first, last, email])
    
    # need to drop students from YS df that are not in 8th grade
    ys_emails = ys_df.drop_duplicates('email').email
    for email in ys_emails:
        # check if they are in skyward
        sky_check = sky_df.loc[sky_df['Student\'s School Email'] == email]
        if len(sky_check) == 0:
            # Then remove them from pruned_df
            pruned_df = pruned_df[pruned_df.email != email]
    
    print(f'{len(not_in_ys)} of {len(sky_df)} {school} 8th graders missing YouScience Results.')
    return not_in_ys, pruned_df

In [3]:
# need to iterate through the files in the schools object
schools = [
    'Oakland Middle School',
    'Siegel Middle School',
    'Whitworth-Buchanan Middle School',
    'Christiana Middle School',
    'Smyrna Middle School',
    'Stewarts Creek Middle School',
    'Rockvale Middle School',
    'Rocky Fork Middle School',
    'Blackman Middle School',
    'Thurman Francis Arts Academy',
    'Rock Springs Middle School',
    'LaVergne Middle School'
]

missing_ys_results = {}
for school in schools:
    # creating paths to various documents
    base = '../YouScienceData/'
    sky_path = base + 'Skyward/Skyward_' + school + '.xlsx'
    ys_path = base + 'YouScience/YouScience_cluster_advising_' + school + '.csv' 
    # run comparisons on rosters length
    missing_ys_results[school], df = compare_rosters(sky_path, ys_path)
    df.to_csv(f'../YouScienceData/Updated_YouScience/{school}_YouScience.csv')
    

n = 0
for school in schools:
    n += len(missing_ys_results[school])
print(f'\n- - -\n\n{n} 8th graders missing YouScience results across RCS.')

97 of 431 Oakland Middle School 8th graders missing YouScience Results.
52 of 425 Siegel Middle School 8th graders missing YouScience Results.
73 of 317 Whitworth-Buchanan Middle School 8th graders missing YouScience Results.
80 of 371 Christiana Middle School 8th graders missing YouScience Results.
109 of 316 Smyrna Middle School 8th graders missing YouScience Results.
52 of 381 Stewarts Creek Middle School 8th graders missing YouScience Results.
91 of 543 Rockvale Middle School 8th graders missing YouScience Results.
85 of 469 Rocky Fork Middle School 8th graders missing YouScience Results.
124 of 612 Blackman Middle School 8th graders missing YouScience Results.
1 of 81 Thurman Francis Arts Academy 8th graders missing YouScience Results.
71 of 323 Rock Springs Middle School 8th graders missing YouScience Results.
117 of 421 LaVergne Middle School 8th graders missing YouScience Results.

- - -

952 8th graders missing YouScience results across RCS.


In [39]:
# export missing students roster
for school in schools:
    X = pd.DataFrame(missing_ys_results[school], columns=['First', 'Last', 'Email']).drop_duplicates()
    X.to_csv(f'../YouScienceData/Missing_YS/{school}_missingYS.csv')
