In [1]:
import pandas as pd
import numpy as np

In [None]:
"""
Objectives:
1) Determine the number of students per school that have 0-4 'Yes's (matches based on value system as is)
Procedure:
<> Compile df that produces the 6 ordered choices per student by the value system
<> filter/iterate by school

v2 Edits:
- Refactored to create match df per school from updated YouScience Rosters
- add in blank preferences for missing students? 
"""

In [12]:
# in v2, just read in school YS df as X
def get_choices_by_school(school = None):
    # filtering school choice
    if school:
        df = X[X.organization == school]
    else:
        df = X

    id_list = list(set(df.id))
    criteria = {
        'id':[],
        'name':[],
        'email':[],
        'First':[],
        'Second':[],
        'Third':[],
        'Fourth':[],
        'Fifth':[],
        'Sixth':[],
        'Enough Choices':[]
    }
    for id in id_list:
        Y = df.loc[df.id == id]
        criteria['id'].append(int(id))
        name = str(Y.first_name.values[0]) + ' ' + str(Y.last_name.values[0])
        criteria['name'].append(name)
        criteria['email'].append(list(Y.email)[0].split('-')[0])
        apt = Y[Y.fit_rank == 1]['aptitude_fit'].values[0]
        # creating a flag in the event that a student does not have 4 unique YS clusters from ranking system
        # First choice = 1st aptitude
        c1 = Y[Y.fit_rank == 1]['aptitude_fit'].values[0]
        criteria['First'].append(c1)
        # second choice = 1st interest
        c2 = Y[Y.fit_rank == 1]['interest_fit'].values[0]
        criteria['Second'].append(c2)
        # third choice = 2nd aptitude
        c3 = Y[Y.fit_rank == 2]['aptitude_fit'].values[0]
        criteria['Third'].append(c3)
        # fourth choice = 3rd aptitude
        c4 = Y[Y.fit_rank == 3]['aptitude_fit'].values[0]
        criteria['Fourth'].append(c4)
        # fifth choice = 2nd interest
        c5 = Y[Y.fit_rank == 2]['interest_fit'].values[0]
        criteria['Fifth'].append(c5)
        # sixth choice = 3rd interest
        c6 = Y[Y.fit_rank == 3]['interest_fit'].values[0]
        criteria['Sixth'].append(c6)

        check = set([c1, c2, c3, c4, c5, c6])
        if len(check) < 4:
            criteria['Enough Choices'].append(0)
        else:
            criteria['Enough Choices'].append(1)
        

    return criteria

In [19]:
# continues on get_choices_by_school()
# requires the criteria dictionary to be structured from the above function, get_choices_by_school()
# also adds n = 5 empty students per school for last minute arrivals
def fill_in_missing_ys(criteria, school, n=5):
    # read in roster of students missing YS results
    missing = pd.read_csv(f'../YouScienceData/Missing_YS/{school}_missingYS.csv')
    missing.drop('Unnamed: 0', axis=1, inplace=True)
    # append values to criteria lists, filling in missing criteria with 0's 
    for i in range(len(missing)):
        student = missing.iloc[i]
        # just adding a placeholder id since Skyward doesn't provide it
        id = 'Missing ' + str(i)
        criteria['id'].append(id)
        name = str(student.First) + ' ' + str(student.Last)
        criteria['name'].append(name)
        criteria['email'].append(student.Email)
        # adding 0's for matches, etc. 
        criteria['First'].append(0)
        criteria['Second'].append(0)
        criteria['Third'].append(0)
        criteria['Fourth'].append(0)
        criteria['Fifth'].append(0)
        criteria['Sixth'].append(0)
        criteria['Enough Choices'].append(0)
    
    # adding open positions for last minute arrivals
    for i in range(n):
        id = f'Empty {i}'
        criteria['id'].append(id)
        criteria['Name'].append(f'Open {i+1}')
        criteria['email'].append(f'Open {i+1}')
        criteria['First'].append(0)
        criteria['Second'].append(0)
        criteria['Third'].append(0)
        criteria['Fourth'].append(0)
        criteria['Fifth'].append(0)
        criteria['Sixth'].append(0)
        criteria['Enough Choices'].append(0)

    return criteria

In [13]:
schools = ['Oakland Middle School',
    'Siegel Middle School',
    'Whitworth-Buchanan Middle School',
    'Christiana Middle School',
    'Smyrna Middle School',
    'Stewarts Creek Middle School',
    'Rockvale Middle School',
    'Rocky Fork Middle School',
    'Blackman Middle School',
    'Thurman Francis Arts Academy',
    'Rock Springs Middle School',
    'LaVergne Middle School'
]

In [20]:
# updated for v2
for school in schools:
    print(f'Working on: {school}')
    # read in Updated YS roster df as X
    p = f'../YouScienceData/Updated_YouScience/{school}_YouScience.csv'
    X = pd.read_csv(p)
    X.drop('Unnamed: 0', axis=1,inplace=True)
    # v2: do not specify school as an argument for get_choices_by_school()
    criteria = get_choices_by_school()
    # v2: run fill_in_missing_ys(criteria, school) For later versions specify n=# for # black rosters to be added
    criteria = fill_in_missing_ys(criteria=criteria,school=school)
    df = pd.DataFrame(criteria)
    path = 'YS_Criteria_by_School/' + school + ' YSCriteria.csv'
    df.to_csv(path)
    print('Done.')

Working on: Oakland Middle School
Done.
Working on: Siegel Middle School
Done.
Working on: Whitworth-Buchanan Middle School
Done.
Working on: Christiana Middle School
Done.
Working on: Smyrna Middle School
Done.
Working on: Stewarts Creek Middle School
Done.
Working on: Rockvale Middle School
Done.
Working on: Rocky Fork Middle School
Done.
Working on: Blackman Middle School
Done.
Working on: Thurman Francis Arts Academy
Done.
Working on: Rock Springs Middle School
Done.
Working on: LaVergne Middle School
Done.


In [None]:
RCS_all = get_choices_by_school()
df = pd.DataFrame(RCS_all)
df.to_csv('YS_Criteria_by_School/All RCS YS Criteria.csv')