# Metric RAG computation

This computes the Euclidean distance for each establishment from a base establishment of a consistent type (Academies, Maintained schools, SEN). Each establishment is given a weight against every other establishment and the top 60 for each establishment creates the comparator set for that establishment. This is repeated for all establishments in the establishment types set and then finally across all establishments across all establishment types.

In [1]:
import numpy as np
import pandas as pd
import calculations as calcs
import glob 
import os

# Create and clean directory
from pathlib import Path
Path("output/metric-rag").mkdir(parents=True, exist_ok=True)

files = glob.glob("output/metric-rag/*")
for f in files:
    os.remove(f)

## Parameter lists

The following setup and parameter map for each establishment type calculation. Vectors with `_parameter_list` suffix represent a column read from one of the pre-processed datasets. Vectors with `_parameter_weightings` suffix represent the corresponding weight to be applied to Euclidean distant component; 

In [2]:
sen_breakdown_parameter_list = ['EHC_Primary_need_spld','EHC_Primary_need_mld','EHC_Primary_need_sld',
                      'EHC_Primary_need_pmld','EHC_Primary_need_semh','EHC_Primary_need_slcn','EHC_Primary_need_hi','EHC_Primary_need_vi','EHC_Primary_need_msi','EHC_Primary_need_pd','EHC_Primary_need_asd','EHC_Primary_need_oth']
        
school_parameter_list = ['NumberOfPupils','Percentage Free school meals','Percentage SEN']
school_parameter_weightings = [0.5,0.4,0.1]


special_school_parameter_list = ['Number of pupils','Percentage Free school meals',
                                 'EHC_Primary_need_mld','EHC_Primary_need_sld','EHC_Primary_need_pmld',
                                 'EHC_Primary_need_semh','EHC_Primary_need_slcn','EHC_Primary_need_hi',
                                 'EHC_Primary_need_vi','EHC_Primary_need_msi','EHC_Primary_need_pd','EHC_Primary_need_asd','EHC_Primary_need_oth']
special_school_parameter_weightings = [0.6,0.4,1,1,1,1,1,1,1,1,1,1,1,1]

school_area_parameter_list = ['Total Internal Floor Area','Age Average Score']
school_area_parameter_weightings = [0.8,0.2]

## Academy pupil mix

In [3]:
academy_data = pd.read_csv("output/pre-processing/academies.csv")
academy_data['Boarders (name)'] = academy_data['Boarders (name)'].map(lambda x : 'Not Boarding' if x == 'Unknown' else x)
academy_data['NumberOfPupils'] = academy_data['NumberOfPupils'].fillna(academy_data['NumberOfPupils'].median())
academy_data['Percentage Free school meals'] = academy_data['Percentage Free school meals'].fillna(academy_data['Percentage Free school meals'].median())
academy_data['Percentage SEN'] = academy_data['Percentage SEN'].fillna(academy_data['Percentage SEN'].median())

grouped_academy_data = academy_data.groupby(['SchoolPhaseType','Boarders (name)']).agg(list)
grouped_academy_data = (grouped_academy_data.join(grouped_academy_data['NumberOfPupils'].map(calcs.compute_range).rename('Pupil_Range'))
                    .join(grouped_academy_data['Percentage Free school meals'].map(calcs.compute_range).rename('FSM_Range'))
                    .join(grouped_academy_data['Percentage SEN'].map(calcs.compute_range).rename('SEN_Range')))

special_excluded = grouped_academy_data[grouped_academy_data.index.get_level_values('SchoolPhaseType') != 'Special']
special_excluded

Unnamed: 0_level_0,Unnamed: 1_level_0,URN,Company Registration Number,Incorporation Date,Academy Trust UPIN,Academy UKPRN,Academy Trust Name,Academy Name,Academy UPIN,Trust Type,Date Opened,...,Trust Balance,Central Services Balance,PFI School,Central Services Financial Position,Academy Financial Position,Trust Financial Position,Status,Pupil_Range,FSM_Range,SEN_Range
SchoolPhaseType,Boarders (name),Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Alternative Provision,Not Boarding,"[148144, 145704, 141109, 142569, 144031, 13966...","[11788031, 11788031, 8641815, 6207590, 8299181...","[2019-01-25 00:00:00.0000000, 2019-01-25 00:00...","[151922, 151922, 137840, 135065, 137417, 13688...","[10082817.0, 10082817.0, 10060566.0, 10058211....","[Alternative Learning Trust, Alternative Learn...",[North West Kent Alternative Provision Service...,"[141642, 134280, 131050, 132795, 138990, 12340...","[Multi Academy Trust (MAT), Multi Academy Trus...","[2020-09-01 00:00:00.0000000, 2019-06-01 00:00...",...,"[702.0, 702.0, 0.0, 9560.0, 499.0, nan, 1012.0...","[1817.0, 1817.0, 1128.0, 9560.0, 1421.0, 329.0...","[Non-PFI school, Non-PFI school, Non-PFI schoo...","[Surplus, Surplus, Surplus, Surplus, Surplus, ...","[Surplus, Surplus, Surplus, Deficit, Surplus, ...","[Surplus, Surplus, Deficit, Surplus, Surplus, ...","[Open, Open, Open, Open, Open, Open, Open, Ope...","[315.0, 315.0, 315.0, 315.0, 315.0, 315.0, 315...","[87.5, 87.5, 87.5, 87.5, 87.5, 87.5, 87.5, 87....","[100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100..."
Post-16,Boarding,"[140971, 148791]","[8515877, 9320523]","[2013-05-03 00:00:00.0000000, 2014-11-20 00:00...","[137977, 135984]","[10060436.0, 10060902.0]","[Exeter Mathematics School, Richard Huish Trust]","[Exeter Mathematics School, Richard Huish Coll...","[130500, 163528]","[Single Academy Trust (SAT), Multi Academy Tru...","[2014-09-01 00:00:00.0000000, 2021-10-01 00:00...",...,"[nan, -68.0]","[618.0, 915.0]","[Non-PFI school, Non-PFI school]","[Surplus, Surplus]","[Surplus, Surplus]","[Unknown, Deficit]","[Open, (Re)opened in period]","[127.0, 127.0]","[23.4, 23.4]","[0.15956155260328098, 0.15956155260328098]"
Post-16,Not Boarding,"[144463, 144886, 144740, 143704, 145749, 13989...","[10578239, 8506178, 9802491, 2303464, 11200244...","[2017-01-23 00:00:00.0000000, 2013-04-26 00:00...","[139638, 137855, 140016, 134876, 141412, 13725...","[10064151.0, 10060425.0, 10066090.0, 10058150....","[Altus Education Partnership, Brigantia Learni...","[Rochdale Sixth Form College, Longley Park Six...","[138453, 139349, 139580, 152005, 140514, 12333...","[Multi Academy Trust (MAT), Multi Academy Trus...","[2017-04-01 00:00:00.0000000, 2017-08-01 00:00...",...,"[-1178.0, 3075.0, nan, 7052.0, 0.0, 5123.0, -1...","[8780.0, 3075.0, 910.0, 7052.0, 110.0, 5123.0,...","[Non-PFI school, Non-PFI school, Non-PFI schoo...","[Surplus, Surplus, Surplus, Surplus, Surplus, ...","[Surplus, Deficit, Surplus, Deficit, Surplus, ...","[Deficit, Surplus, Unknown, Surplus, Deficit, ...","[Open, Open, Open, Open, Open, Open, Open, Ope...","[2117.0, 2117.0, 2117.0, 2117.0, 2117.0, 2117....","[23.4, 23.4, 23.4, 23.4, 23.4, 23.4, 23.4, 23....","[5.723905723905724, 5.723905723905724, 5.72390..."
Primary,Boarding,"[147676, 147857]","[10749662, 7466353]","[2017-05-02 00:00:00.0000000, 2010-12-10 00:00...","[139827, 135711]","[10064620.0, 10058443.0]","[Hamwic Education Trust, Sapientia Education T...","[Weston Secondary School, Wymondham College Pr...","[163109, 162841]","[Multi Academy Trust (MAT), Multi Academy Trus...","[2021-09-01 00:00:00.0000000, 2020-09-01 00:00...",...,"[838.0, 2546.0]","[6194.0, 2546.0]","[Non-PFI school, Non-PFI school]","[Surplus, Surplus]","[Surplus, Deficit]","[Surplus, Surplus]","[Open, Open]","[513.0, 513.0]","[47.400000000000006, 47.400000000000006]","[1.1533312738132016, 1.1533312738132016]"
Primary,Not Boarding,"[148853, 144542, 144551, 148854, 136354, 14021...","[10817580, 10817580, 10817580, 10817580, 73187...","[2017-06-14 00:00:00.0000000, 2017-06-14 00:00...","[139821, 139821, 139821, 139821, 135399, 13539...","[10064612.0, 10064612.0, 10064612.0, 10064612....","[1Excellence Multi Academy Trust, 1Excellence ...","[Evenwood Church of England Primary School, Pe...","[163480, 138448, 138465, 163504, 119379, 12924...","[Multi Academy Trust (MAT), Multi Academy Trus...","[2021-12-01 00:00:00.0000000, 2017-07-01 00:00...",...,"[55.0, 55.0, 55.0, 55.0, 0.0, 0.0, 0.0, 1832.0...","[1154.0, 1154.0, 1154.0, 1154.0, 652.0, 652.0,...","[Non-PFI school, Non-PFI school, Non-PFI schoo...","[Surplus, Surplus, Surplus, Surplus, Surplus, ...","[Surplus, Surplus, Surplus, Surplus, Surplus, ...","[Surplus, Surplus, Surplus, Surplus, Deficit, ...","[(Re)opened in period, Open, Open, (Re)opened ...","[1319.0, 1319.0, 1319.0, 1319.0, 1319.0, 1319....","[87.9, 87.9, 87.9, 87.9, 87.9, 87.9, 87.9, 87....","[26.190476190476193, 26.190476190476193, 26.19..."
Secondary,Boarding,"[148221, 139655, 138289, 136177, 139971, 13915...","[7535642, 8098956, 8082185, 7209122, 7930340, ...","[2011-02-18 00:00:00.0000000, 2012-06-08 00:00...","[135944, 137050, 137026, 135369, 136804, 13490...","[10058531.0, 10059829.0, 10059803.0, 10058300....","[Bohunt Education Trust, Bridgwater & Taunton ...","[Steyning Grammar School, Brymore Academy, Bur...","[163027, 123954, 121840, 119233, 123312, 12067...","[Multi Academy Trust (MAT), Multi Academy Trus...","[2020-12-01 00:00:00.0000000, 2013-09-01 00:00...",...,"[559.0, 1991.0, nan, nan, nan, nan, 34.0, nan,...","[7237.0, 2132.0, 1148.0, 1009.0, 368.0, 182.0,...","[Non-PFI school, Non-PFI school, Non-PFI schoo...","[Surplus, Surplus, Surplus, Surplus, Surplus, ...","[Surplus, Surplus, Surplus, Surplus, Surplus, ...","[Surplus, Surplus, Unknown, Unknown, Unknown, ...","[Open, Open, Open, Open, Open, Open, Open, Ope...","[1943.0, 1943.0, 1943.0, 1943.0, 1943.0, 1943....","[22.599999999999998, 22.599999999999998, 22.59...","[15.244446308490263, 15.244446308490263, 15.24..."
Secondary,Not Boarding,"[136730, 136844, 137377, 137083, 144809, 13703...","[7595434, 7595434, 7740516, 7705552, 7705552, ...","[2011-04-07 00:00:00.0000000, 2011-04-07 00:00...","[134890, 134890, 136150, 135666, 135666, 13566...","[10058682.0, 10058682.0, 10059272.0, 10059141....","[5 Dimensions Trust, 5 Dimensions Trust, Abbey...","[Shenley Brook End School, The Hazeley Academy...","[119734, 119844, 120322, 120083, 139214, 12003...","[Multi Academy Trust (MAT), Multi Academy Trus...","[2011-05-01 00:00:00.0000000, 2011-07-01 00:00...",...,"[177.0, 177.0, nan, 1832.0, 1832.0, 1832.0, 14...","[3603.0, 3603.0, 416.0, 4541.0, 4541.0, 4541.0...","[Non-PFI school, Non-PFI school, Non-PFI schoo...","[Surplus, Surplus, Surplus, Surplus, Surplus, ...","[Surplus, Surplus, Surplus, Surplus, Surplus, ...","[Surplus, Surplus, Unknown, Surplus, Surplus, ...","[Open, Open, Open, Open, Open, Open, Open, Ope...","[2990.0, 2990.0, 2990.0, 2990.0, 2990.0, 2990....","[75.4, 75.4, 75.4, 75.4, 75.4, 75.4, 75.4, 75....","[18.46846846846847, 18.46846846846847, 18.4684..."
University technical college,Not Boarding,"[146375, 141111, 139268, 145155, 146303, 14593...","[8707909, 8707909, 8707909, 8707909, 5670663, ...","[2013-09-26 00:00:00.0000000, 2013-09-26 00:00...","[137921, 137921, 137921, 137921, 135047, 13663...","[10060613.0, 10060613.0, 10060613.0, 10060613....","[Activate Learning Education Trust, Activate L...","[UTC Heathrow, UTC Oxfordshire, UTC Reading, U...","[141385, 132690, 122057, 139894, 141375, 14077...","[Multi Academy Trust (MAT), Multi Academy Trus...","[2014-09-01 00:00:00.0000000, 2015-09-01 00:00...",...,"[2146.0, 2146.0, 2146.0, 2146.0, 233.0, -7984....","[279.0, 279.0, 279.0, 279.0, 3802.0, 3810.0, 5...","[Non-PFI school, Non-PFI school, Non-PFI schoo...","[Surplus, Surplus, Surplus, Surplus, Surplus, ...","[Deficit, Deficit, Surplus, Deficit, Deficit, ...","[Surplus, Surplus, Surplus, Surplus, Surplus, ...","[Open, Open, Open, Open, Open, Open, Open, Clo...","[684.0, 684.0, 684.0, 684.0, 684.0, 684.0, 684...","[38.099999999999994, 38.099999999999994, 38.09...","[8.14566527505122, 8.14566527505122, 8.1456652..."


In [15]:
sets = pd.DataFrame()
for (idx, row) in special_excluded.iterrows():
    distances = calcs.non_special_distance_calc(row['URN'], row['NumberOfPupils'], row['Percentage Free school meals'], row['Percentage SEN'], row['Pupil_Range'], row['FSM_Range'], row['SEN_Range'])
    df = (pd.DataFrame(distances, columns=['URN', 'Candidates', 'Distance'])
                .sort_values(by=['URN', 'Distance'], ascending=[False,True])
                .groupby('URN')
                .agg(list)
                .apply(lambda d: {'Candidates': d['Candidates'][:60], 'Distance': d['Distance'][:60]}, axis=1, result_type='expand')
            )
    df.reset_index(inplace=True)
    df['SchoolPhaseType'] = idx[0]
    df['Boarders (name)'] = idx[1]
    df.set_index(['SchoolPhaseType','Boarders (name)','URN'], inplace=True)
    sets = pd.concat([sets, df])
    

sets

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Candidates,Distance
SchoolPhaseType,Boarders (name),URN,Unnamed: 3_level_1,Unnamed: 4_level_1
Alternative Provision,Not Boarding,138264.0,"[138264.0, 143761.0, 139925.0, 147855.0, 14747...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0082868920624..."
Alternative Provision,Not Boarding,138277.0,"[138277.0, 142259.0, 142793.0, 139509.0, 13950...","[0.0, 0.0, 0.006734350297014739, 0.16934953291..."
Alternative Provision,Not Boarding,138379.0,"[138379.0, 142835.0, 144023.0, 140627.0, 14789...","[0.0, 0.15735913879522825, 0.17037527409942446..."
Alternative Provision,Not Boarding,138775.0,"[138775.0, 141739.0, 139413.0, 140649.0, 13982...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
Alternative Provision,Not Boarding,139114.0,"[139114.0, 142702.0, 139665.0, 148588.0, 14322...","[0.0, 0.03237893551141869, 0.06203413424916427..."
...,...,...,...,...
University technical college,Not Boarding,146560.0,"[146560.0, 142900.0, 145900.0, 140160.0, 14260...","[0.0, 0.11567928444989008, 0.22854898114198458..."
University technical college,Not Boarding,146648.0,"[146648.0, 147685.0, 142899.0, 145900.0, 14289...","[0.0, 0.07482560347671036, 0.10047620295547079..."
University technical college,Not Boarding,147685.0,"[147685.0, 142890.0, 142899.0, 142605.0, 14026...","[0.0, 0.0902955379139546, 0.17281887853211256,..."
University technical college,Not Boarding,147838.0,"[147838.0, 144766.0, 140987.0, 139234.0, 14096...","[0.0, 0.10695042009140009, 0.1425305213068696,..."


## Maintained school pupil mix

## All pupil mix

## Academy area mix

## Maintained school area mix

## All area mix