In [1]:
import pandas as pd
import numpy as np

In [None]:
"""
Goal: 
- Create a rudimentary matching algorithm 
- Note the process and decisions made here: 
Decisions: 
- Scope of Rough Draft: work for one school, then focus on generalizing functions/script
- Want to track the number of clusters that were dropped due to no matching POS per school
"""

In [None]:
# (1) Need to read in by_school match csv and tally the demand of YS clusters per school
# (2) Need to convert YS clusters into POSs per school
# (3) Need to determine capacity vector for school

In [2]:
schools = ['Oakland Middle School',
    'Siegel Middle School',
    'Whitworth-Buchanan Middle School',
    'Christiana Middle School',
    'Smyrna Middle School',
    'Stewarts Creek Middle School',
    'Rockvale Middle School',
    'Rocky Fork Middle School',
    'Blackman Middle School',
    'Thurman Francis Arts Academy',
    'Rock Springs Middle School',
    'LaVergne Middle School'
]

In [6]:
# checking volume vs. capacity per school
capacity_report = {
    'School':[],
    'Status':[],
    'Assigned Capacity':[],
    '8th Graders':[],
    'Number of Large Rooms':[]
}


for school in schools:
    # read in planning document to get list of rooms
    if school == 'Whitworth-Buchanan Middle School':
        plan_df = pd.read_excel('planning.xlsx', sheet_name='Whitworth-Buchanan Middle Schoo')
    else:
        plan_df = pd.read_excel('planning.xlsx', sheet_name=school)
    # get the rooms per that school
    rooms = list(plan_df['MS Room #'])
    capacity = 0
    lg_rooms = 0
    for room in rooms:
        if room in ['Library', 'Auditorium']:
            capacity += 50
            lg_rooms += 1
        else:
            capacity += 35
    
    # determine volume from YS match roster
    path = f'YS_Criteria_by_School/{school} YSCriteria.csv'
    ys_df = pd.read_csv(path)
    volume = len(ys_df)

    # compile dictionary for report
    capacity_report['School'].append(school)
    if capacity >= volume:
        capacity_report['Status'].append('Ready')
    else:
        capacity_report['Status'].append('Insufficient Space')
    capacity_report['Assigned Capacity'].append(capacity)
    capacity_report['8th Graders'].append(volume)
    capacity_report['Number of Large Rooms'].append(lg_rooms)

pd.DataFrame(capacity_report)

Unnamed: 0,School,Status,Assigned Capacity,8th Graders,Number of Large Rooms
0,Oakland Middle School,Ready,665,420,0
1,Siegel Middle School,Ready,555,430,2
2,Whitworth-Buchanan Middle School,Ready,625,322,2
3,Christiana Middle School,Ready,595,376,0
4,Smyrna Middle School,Ready,540,321,1
5,Stewarts Creek Middle School,Ready,555,386,2
6,Rockvale Middle School,Ready,575,537,1
7,Rocky Fork Middle School,Ready,540,276,1
8,Blackman Middle School,Insufficient Space,525,617,0
9,Thurman Francis Arts Academy,Ready,525,86,0


In [None]:
""" 
Need to convert YS matches to available POS offerings per school
Do we want to track the number of students per school that do not have 1st or 2nd matches? 
    - Would you ignore the students without YS results as well as the empty placeholders? 
Brainstorm the how... 
    - Starting from the most demanded POS? 
    - How to track students across block and across POS
"""

In [None]:
# (1)
# read in csv
school = 'Blackman Middle School'
path = f'YS_Criteria_by_School/{school} YSCriteria.csv'
school_clusters_df = pd.read_csv(path)
# tidy
school_clusters_df.drop('Unnamed: 0', axis=1, inplace=True)
# read in direct join prepaired table
djp_df = pd.read_excel('../direct_join_prepared.xlsx')
# tidy
djp_df.drop('Unnamed: 0', axis=1, inplace=True)
# filter to school specific
cluster_df = djp_df[['YouScience Clusters', school]]

In [None]:
# (2)
# creating replacement dictionary
to_replace = {}
for i in range(len(cluster_df)):
    X = cluster_df.iloc[i]
    to_replace[X['YouScience Clusters']] = X[school]

# replacing clusters with POSs
school_POS_df = school_clusters_df.replace(to_replace=to_replace)
school_POS_df.replace({np.NAN:0}, inplace=True)
# unsupported matches are replaced with POS = 0

In [None]:
# determining demand of available POS, using 1st and 2nd matches
l = school_POS_df.First.value_counts() + school_POS_df.Second.value_counts()
ord = dict(l.sort_values(ascending=False))

In [None]:
X = school_POS_df.loc[school_POS_df.First == 0]
len(X)

In [None]:
Y = X.loc[X.Second == 0]
len(Y)