In [2]:
import geopandas as gpd
from tqdm import tqdm
import libpysal as lp
import json
import pandas as pd

In [3]:
def retrieve_data(sy):
    """
    Retrieve data for a given school year (sy)
    """
    data_dir = "./LCPS_data"
    # Read the data files
    schools = gpd.read_file('{}/LCPS_Sites_{}.shp'.format(data_dir, sy))
    students = gpd.read_file('{}/Students_{}.shp'.format(data_dir, sy))
    #spas = gpd.read_file('{}/PlanningZones_{}.shp'.format(data_dir, sy))
    
    return students, schools


In [4]:
sy = '2017_2018'

In [5]:
students, schools = retrieve_data(sy)

# To compare student population to capacity (within a district) 

In [15]:
students.keys()
# students['GRADE']


Index(['ObjectID', 'ARC_Single', 'Loud_ID', 'Address', 'Address_Ex', 'ADDwEXT',
       'Prefix', 'Street_Nam', 'Street_Typ', 'Suffix', 'Town', 'Zip_1',
       'Subdivisio', 'GRID_CODE', 'GRADE', 'IEP_FLAG', 'GENDER', 'ETHNIC',
       'BIRTH', 'APT_PH', 'STATE_PH', 'LEP_FLAG', 'FSI', 'Current_S',
       'Previous_S', 'ELEM_CODE', 'INT_CODE', 'HIGH_CODE', 'geometry'],
      dtype='object')

## Elementary School

### Population from Students dataframe

In [12]:
# students.head()
#Direct groupby with student district(ELEM_CODE)

elementary_students = students[students['GRADE'] <= 5]
students_elem_code = elementary_students[['ObjectID', 'ELEM_CODE']]
students_elem_code
grouped_df = students_elem_code.groupby(['ELEM_CODE']).agg('count')
grouped_df.rename(columns={"ObjectID":"Num_Students"}, inplace=True)
grouped_df.head()



Unnamed: 0_level_0,Num_Students
ELEM_CODE,Unnamed: 1_level_1
ALD,162
ALG,395
ARC,829
ASH,517
BAL,534


### Capacity from Schools dataframe

In [13]:
# schools['CAPACITY']
elementary_schools = schools[schools['CLASS'] == 'ELEMENTARY']
output_df2 = elementary_schools[['SCH_CODE', 'CAPACITY']]
output_df2.rename(columns={"SCH_CODE":"ELEM_CODE"}, inplace=True)
output_df2.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(**kwargs)


Unnamed: 0,ELEM_CODE,CAPACITY
1,MSE,1003
2,CTY,815
3,HUT,862
5,BUF,1039
6,LIN,138


### Merged dataframes

In [14]:
merged_df = pd.merge(grouped_df, output_df2, on='ELEM_CODE')
merged_df.head()

Unnamed: 0,ELEM_CODE,Num_Students,CAPACITY
0,ALD,162,138
1,ALG,395,658
2,ARC,829,924
3,ASH,517,730
4,BAL,534,730


### Comparison of capacities to find balance

In [15]:
#schools['SCH_NUM']
#schools['CAPACITY']
imbalance = merged_df['Num_Students'] > merged_df['CAPACITY']
num_imbalance = sum(imbalance)
print('The number of imbalanced/ overpopulated schools is', num_imbalance)
balance = len(merged_df) - num_imbalance
print('The number of balanced schools is', balance)

The number of imbalanced/ overpopulated schools is 2
The number of balanced schools is 53


In [51]:
imbalance_df = merged_df[imbalance]


### Quantifying the imbalance

In [53]:
imbalance_df.loc[:,'Amount_Imbalance'] = (imbalance_df['Num_Students']-imbalance_df['CAPACITY'])

In [55]:
imbalance_df.loc[:, 'Percentage'] = imbalance_df['Amount_Imbalance']/ imbalance_df['CAPACITY']

In [56]:
imbalance_df

Unnamed: 0,ELEM_CODE,Num_Students,CAPACITY,Amount_Imbalance,Percentage
0,ALD,162,138,24,0.173913
7,BUF,1353,1039,314,0.302214


###### The percentage imbalance of 17% and 30% was recorded for ALD and BUF respectfully

## Middle School

### Population from Students dataframe

In [69]:
schools[schools['CLASS']=='MIDDLE']

Unnamed: 0,OBJECTID,SCH_CODE,CLASS,SCH_NUM,NAME,DATE_OPENE,BUILDING_D,CLASSROOMS,SPECIAL_SI,TRAILERS,...,STRT_GRD,END_GRD,ELEM_,INT_,MID_,HIGH_,CAPACITY,PERM_CLRM,PORT_CLRM,geometry
10,11,RBM,MIDDLE,208,RIVER BEND MS,2002,0,63,8,0,...,6,8,0,0,208,0,1216,63,0,POINT (11795742.945 7068417.710)
12,13,SRM,MIDDLE,209,SENECA RIDGE MS,1977,0,70,4,0,...,6,8,0,0,209,0,1264,70,0,POINT (11803055.580 7059496.639)
13,14,JLS,MIDDLE,210,J LUPTON SIMPSON MS,1976,0,70,2,0,...,6,8,0,0,210,0,1255,70,0,POINT (11746869.610 7081464.679)
14,15,BAM,MIDDLE,216,BRAMBLETON MS,2017,75,75,0,0,...,6,8,0,0,216,0,1269,75,0,POINT (11752419.416 7041375.768)
24,25,BRM,MIDDLE,202,BLUE RIDGE MS,1971,0,64,5,0,...,6,8,0,0,202,0,1187,64,0,POINT (11707787.221 7094850.020)
26,27,BEM,MIDDLE,201,BELMONT RIDGE MS,2003,0,60,14,0,...,6,8,0,0,201,0,1242,60,0,POINT (11769448.875 7081692.730)
28,29,MMS,MIDDLE,207,MERCER MS,2004,0,74,2,0,...,6,8,0,0,207,0,1388,74,0,POINT (11754067.400 7021436.401)
40,41,HPM,MIDDLE,206,HARPER PARK MS,1999,0,62,8,0,...,6,8,0,0,206,0,1197,62,0,POINT (11760302.724 7083213.683)
44,45,ERM,MIDDLE,203,EAGLE RIDGE MS,2001,0,60,14,0,...,6,8,0,0,203,0,1260,60,0,POINT (11762520.169 7054333.740)
57,58,FWS,MIDDLE,204,FARMWELL STATION MS,1995,0,59,14,2,...,6,8,0,0,204,0,1241,59,2,POINT (11774699.744 7062320.886)


In [70]:
middle_students = students[(students['GRADE'] >= 6) & (students['GRADE'] <= 8)]
students_mid_code = middle_students[['ObjectID', 'INT_CODE']]
students_mid_code
grouped_df = students_mid_code.groupby(['INT_CODE']).agg('count')
grouped_df.rename(columns={"ObjectID":"Num_Students"}, inplace=True)
grouped_df.head()



Unnamed: 0_level_0,Num_Students
INT_CODE,Unnamed: 1_level_1
BAM,615
BEM,375
BRM,621
ERM,622
FWS,509


### Capacity from Schools dataframe

In [71]:
# schools['CAPACITY']
middle_schools = schools[schools['CLASS'] == 'MIDDLE']
output_df2 = middle_schools[['SCH_CODE', 'CAPACITY']]
output_df2.rename(columns={"SCH_CODE":"INT_CODE"}, inplace=True)
output_df2.head()

Unnamed: 0,INT_CODE,CAPACITY
10,RBM,1216
12,SRM,1264
13,JLS,1255
14,BAM,1269
24,BRM,1187


### Merged dataframes

In [72]:
merged_df = pd.merge(grouped_df, output_df2, on='INT_CODE')
merged_df.head()

Unnamed: 0,INT_CODE,Num_Students,CAPACITY
0,BAM,615,1269
1,BEM,375,1242
2,BRM,621,1187
3,ERM,622,1260
4,FWS,509,1241


### Comparison of capacities to find balance

In [73]:
imbalance = merged_df['Num_Students'] > merged_df['CAPACITY']
num_imbalance = sum(imbalance)
print('The number of imbalanced/ overpopulated schools is', num_imbalance)
balance = len(merged_df) - num_imbalance
print('The number of balanced schools is', balance)

The number of imbalanced/ overpopulated schools is 0
The number of balanced schools is 16


In [74]:
imbalance_df = merged_df[imbalance]


### Quantifying the imbalance

In [75]:
imbalance_df.loc[:,'Amount_Imbalance'] = (imbalance_df['Num_Students']-imbalance_df['CAPACITY'])

In [76]:
imbalance_df.loc[:, 'Percentage'] = imbalance_df['Amount_Imbalance']/ imbalance_df['CAPACITY']

In [77]:
imbalance_df

Unnamed: 0,INT_CODE,Num_Students,CAPACITY,Amount_Imbalance,Percentage


###### The percentage imbalance of 0% was recorded

## High School

### Population from Students dataframe

In [78]:
high_students = students[(students['GRADE'] >= 9) & (students['GRADE'] <= 12)]
students_high_code = high_students[['ObjectID', 'HIGH_CODE']]
students_high_code
grouped_df = students_high_code.groupby(['HIGH_CODE']).agg('count')
grouped_df.rename(columns={"ObjectID":"Num_Students"}, inplace=True)
grouped_df.head()

Unnamed: 0_level_0,Num_Students
HIGH_CODE,Unnamed: 1_level_1
BRH,1555
BWH,1840
DMH,1582
FHS,2029
HTH,1437


### Capacity from Schools dataframe

In [79]:
# schools['CAPACITY']
high_schools = schools[schools['CLASS'] == 'HIGH']
output_df2 = high_schools[['SCH_CODE', 'CAPACITY']]
output_df2.rename(columns={"SCH_CODE":"HIGH_CODE"}, inplace=True)
output_df2.head()

Unnamed: 0,HIGH_CODE,CAPACITY
4,MTC,0
8,LCH,1361
11,DMH,1423
19,DCS,0
25,BWH,1855


### Merged dataframes

In [80]:
merged_df = pd.merge(grouped_df, output_df2, on='HIGH_CODE')
merged_df.head()

Unnamed: 0,HIGH_CODE,Num_Students,CAPACITY
0,BRH,1555,1787
1,BWH,1840,1855
2,DMH,1582,1423
3,FHS,2029,1858
4,HTH,1437,1687


### Comparison of capacities to find balance

In [81]:
imbalance = merged_df['Num_Students'] > merged_df['CAPACITY']
num_imbalance = sum(imbalance)
print('The number of imbalanced/ overpopulated schools is', num_imbalance)
balance = len(merged_df) - num_imbalance
print('The number of balanced schools is', balance)

The number of imbalanced/ overpopulated schools is 8
The number of balanced schools is 7


In [82]:
imbalance_df = merged_df[imbalance]


### Quantifying the imbalance

In [83]:
imbalance_df.loc[:,'Amount_Imbalance'] = (imbalance_df.loc[:,'Num_Students']-imbalance_df.loc[:,'CAPACITY'])

In [84]:
imbalance_df.loc[:, 'Percentage'] = imbalance_df.loc[:,'Amount_Imbalance']/ imbalance_df.loc[:,'CAPACITY']

In [85]:
imbalance_df

Unnamed: 0,HIGH_CODE,Num_Students,CAPACITY,Amount_Imbalance,Percentage
2,DMH,1582,1423,159,0.111736
3,FHS,2029,1858,171,0.092034
6,LCH,1484,1361,123,0.090375
7,LVH,1402,1346,56,0.041605
8,PFH,1694,1438,256,0.178025
9,PVH,1553,1498,55,0.036716
10,RRH,1975,1935,40,0.020672
12,SBH,1690,1651,39,0.023622


###### The percentage imbalance of 11%, 9.2%, 9%, 4%, 17%, 3%, 2% and 2.3% was recorded for DMH, FHS, LCH, LVH, PFH, PVH, RRH, and SBH respectfully
