In [None]:
import pandas as pd
import numpy as np
"""
Goal: 
- Create a rudimentary matching algorithm 
- Note the process and decisions made here: 
Decisions: 
- Scope of Rough Draft: work for one school, then focus on generalizing functions/script
- Want to track the number of clusters that were dropped due to no matching POS per school
"""

In [None]:
# (1) Need to read in by_school match csv and tally the demand of YS clusters per school
# (2) Need to convert YS clusters into POSs per school
# (3) Need to determine capacity vector for school

In [None]:
schools = ['Oakland Middle School',
    'Siegel Middle School',
    'Whitworth-Buchanan Middle School',
    'Christiana Middle School',
    'Smyrna Middle School',
    'Stewarts Creek Middle School',
    'Rockvale Middle School',
    'Rocky Fork Middle School',
    'Blackman Middle School',
    'Thurman Francis Arts Academy',
    'Rock Springs Middle School',
    'LaVergne Middle School'
]

In [None]:
# (1)
# read in csv
school = 'Blackman Middle School'
path = 'YS_Criteria_by_School/' + school + ' YSCriteria.csv'
school_clusters_df = pd.read_csv(path)
# tidy
school_clusters_df.drop('Unnamed: 0', axis=1, inplace=True)
# read in conversion table
djp_df = pd.read_excel('../direct_join_prepared.xlsx')
# tidy
djp_df.drop('Unnamed: 0', axis=1, inplace=True)
# filter to school specific
cluster_df = djp_df[['YouScience Clusters', school]]

In [None]:
# (2)
# creating replacement dictionary
to_replace = {}
for i in range(len(cluster_df)):
    X = cluster_df.iloc[i]
    to_replace[X['YouScience Clusters']] = X[school]

# replacing clusters with POSs
school_POS_df = school_clusters_df.replace(to_replace=to_replace)
school_POS_df.replace({np.NAN:0}, inplace=True)
# unsupported matches are replaced with POS = 0

In [None]:
# determining demand of available POS, using 1st and 2nd matches
l = school_POS_df.First.value_counts() + school_POS_df.Second.value_counts()
ord = dict(l.sort_values(ascending=False))

In [None]:
# need to create the capacity vector per school 
# all rooms save for auditorium and library have capacity of 35, otherwise 50
# the planning.xlsx doc has been created with sheets with names that match the schools list above
planning_df = pd.read_excel('planning.xlsx',sheet_name=school)
rooms = list(planning_df['MS Room #'])
# checking to assign size, capacity object will be dict with room #: size
capacity = {}
for room in rooms:
    if room in ['Library', 'Auditorium']:
        capacity[room] = 50
    else:
        capacity[room] = 35
check = sum(capacity.values())
if check < len(school_POS_df):
    print(f'Capacity ({check}) does not fit school size ({len(school_POS_df)})')

In [None]:
X = school_POS_df.loc[school_POS_df.First == 0]
len(X)

In [None]:
Y = X.loc[X.Second == 0]
len(Y)