#### Data sources:

**1) State test Math and ELA results (2022-2023)**

Report Card Database (251.35 megabytes): This Access database contains assessment results (elementary- and intermediate-level ELA, Math, and Science; Annual Regents; Total Cohort Regents; NYSESLAT; NYSAA), for the state, districts, public with charter schools, by county, and Need to Resource Capacity group.
https://data.nysed.gov/downloads.php

**2) Schools locations**

NYS GIS Clearinghouse: NYS Schools
https://data.gis.ny.gov/maps/b6c624c740e4476689aa60fdc4aacb8f/about

#### Definitions of Performance Levels for the 2023 Grades 3-8 English Language Arts and Mathematics Tests  

**NYS Level 1**: Students performing at this level are below proficient in standards for their grade. They may demonstrate limited knowledge, skills, and practices embodied by the Learning Standards that are considered insufficient for the expectations at this grade. 

**NYS Level 2**: Students performing at this level are partially proficient in standards for their grade. They demonstrate knowledge, skills, and practices embodied by the Learning Standards that are considered partial but insufficient for the expectations at this grade. Students performing at Level 2 are considered on track to meet current New York high school graduation requirements but are not yet proficient in Learning Standards at this grade. 

**NYS Level 3**: Students performing at this level are proficient in standards for their grade. They demonstrate knowledge, skills, and practices embodied by the Learning Standards that are considered sufficient for the expectations at this grade.  

**NYS Level 4**: Students performing at this level excel in standards for their grade. They demonstrate knowledge, skills, and practices embodied by the Learning Standards that are considered more than sufficient for the expectations at this grade.  

*Source: NYSED, 2023, https://www.p12.nysed.gov/irs/ela-math/2023/ela-math-score-ranges-performance-levels-2023.pdf*

### Imports

In [1]:
import os
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import folium
from shapely.geometry import Point
from fuzzywuzzy import process
import fuzzywuzzy
import base64
from io import BytesIO
import math
from tqdm import tqdm
import importlib
import utils
importlib.reload(utils)
from utils import create_plot, match_name

pd.set_option('display.float_format', '{:.3f}'.format)



### Read data

In [2]:
basePath = r"G:\My Drive\Kids\NYC_schools_mapped"
dataFolder = r"raw_data"
outputFolder = r"processed_data"

In [3]:
# Read GeoJSON into data frame
SchoolsFile = 'NYS_Schools.geojson'
NYCSchoolsPath = os.path.join(basePath, dataFolder, SchoolsFile)
NYCSchoolsGeom = gpd.read_file(NYCSchoolsPath)

# DistrictsFile = 'School Districts.geojson'
# NYCDistrictsPath = os.path.join(basePath, dataFolder, DistrictsFile)
# NYCDistrictsGeom = gpd.read_file(NYCDistrictsPath)

In [4]:
## Read schools test results files

# read schools math results file
fileName_math = "NYS_MS_MATH_from_NYS.xlsx"
mathPath = os.path.join(basePath,dataFolder,fileName_math)
print(mathPath)
mathResultsDF = pd.read_excel(mathPath)

# read schools ELA results file
fileName_ELA = "NYS_MS_ELA_from_NYS.xlsx"
ELAPath = os.path.join(basePath, dataFolder, fileName_ELA)
print(ELAPath)
ELAResultsDF = pd.read_excel(ELAPath)

G:\My Drive\Kids\NYC_schools_mapped\raw_data\NYS_MS_MATH_from_NYS.xlsx
G:\My Drive\Kids\NYC_schools_mapped\raw_data\NYS_MS_ELA_from_NYS.xlsx


In [None]:
# ## Read district results files

# # Read file with district wide Math test results to add to the map
# DistrictMathFile = "DistrictsMSMAthNorm.xlsx"
# DistrictMathPath = os.path.join(basePath, outputFolder, DistrictMathFile)
# DistrictMSMathData = pd.read_excel(DistrictMathPath)
# print(DistrictMSMathData.head(5))

# # Read file with district wide ELA test results to add to the map
# DistrictELAFile = "DistrictsMSELANorm.xlsx"
# DistrictELAPath = os.path.join(basePath, outputFolder, DistrictELAFile)
# DistrictMSELAData = pd.read_excel(DistrictELAPath)
# print(DistrictMSELAData.head(5))

In [None]:
mathResultsDF.info()

In [None]:
ELAResultsDF.info()

In [5]:
mathResultsDF.rename(columns = {'YEAR':'Year'}, inplace = True)
mathResultsDF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29424 entries, 0 to 29423
Data columns (total 11 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   INSTITUTION_ID   29424 non-null  int64 
 1   ENTITY_NAME      29424 non-null  object
 2   Year             29424 non-null  int64 
 3   ASSESSMENT_NAME  29424 non-null  object
 4   SUBGROUP_NAME    29424 non-null  object
 5   NUM_TESTED       29424 non-null  int64 
 6   LEVEL1_COUNT     29424 non-null  object
 7   LEVEL2_COUNT     29424 non-null  object
 8   LEVEL3_COUNT     29424 non-null  object
 9   LEVEL4_COUNT     29424 non-null  object
 10  COUNTY_NAME      29424 non-null  object
dtypes: int64(3), object(8)
memory usage: 2.5+ MB


### Prepare school layer

In [6]:
# Get locations for public schools only 
#(select only public schools (public, charter, charter, SATELLITE SITE FOR CHARTER SCHOOLS) from geoJSON)

NYCSchoolsGeom = NYCSchoolsGeom[NYCSchoolsGeom['INST_TYPE_DESC'] == 'PUBLIC SCHOOLS']
NYCSchoolsGeom

Unnamed: 0,OBJECTID,LEGAL_NAME,PHYSADDRLINE1,PHYSADDRLINE2,PHYSCITY,PHYSICALSTATE,PHYSZIPCD5,COUNTY_DESC,Contact_Name,CEO_TITLE,...,CEO_EMAIL,INST_TYPE_DESC,INSTSUBTYPDESC,RECORD_TYPE_DESC,COMMUNITY_TYPE_DESC,DIST_TYPE_DESC,SDL_DESC,INSTIT_ID,SED_CODE,geometry
1,2,CANASERAGA SCHOOL,8 MAIN ST,,CANASERAGA,NY,14822,ALLEGANY,SHELBY DEMITRY,PRINCIPAL,...,sdemitry@ccsdny.org,PUBLIC SCHOOLS,PUBLIC SCHOOL CENTRAL,PUBLIC SCHOOL (IMF),SMALL CENTRAL DISTRICTS,,CANASERAGA CSD,800000055189,021102040001,POINT (272197.657 4704860.403)
3,4,SISULU-WALKER CHARTER SCHOOL OF HARLEM,71-111 CONVENT AVE,,NEW YORK,NY,10027,NEW YORK,MICHELLE HAYNES,CHARTER SCHOOL LEADER,...,mhaynes@sisuluwalker.org,PUBLIC SCHOOLS,CHARTER SCHOOL,CHARTER SCHOOLS (IMF),NEW YORK CITY,,NYC GEOG DIST 5,800000047050,310500860804,POINT (588323.009 4518829.525)
6,7,OUR WORLD NEIGHBORHOOD CHARTER MS,38-27 30TH ST,,LONG ISLAND CITY,NY,11101,QUEENS,BRIAN FERGUSON,CHARTER SCHOOL LEADER,...,bferguson@owncs.org,PUBLIC SCHOOLS,SATELLITE SITE FOR CHARTER SCHOOLS,OTHER- NON IMF,NEW YORK CITY,,NYC GEOG DIST 30,800000059919,800000059919,POINT (590052.790 4511940.920)
17,18,INNOVA GIRLS ACADEMY CHARTER SCHOOL,546 OXFORD ST,,ROCHESTER,NY,14607,MONROE,LINDSAY SWANSON,CHARTER SCHOOL LEADER,...,lswanson@bes.org,PUBLIC SCHOOLS,CHARTER SCHOOL,CHARTER SCHOOLS (IMF),LARGE CITIES,,ROCHESTER CITY SD,800000092482,261600861196,POINT (289477.193 4780096.186)
21,22,GROWING UP GREEN MIDDLE CHARTER SCHOOL,34-12 10TH ST,,LONG ISLAND CITY,NY,11106,QUEENS,STEVE VIOLA,DIRECTOR OF OPERATIONS,...,viola@gugcs.org,PUBLIC SCHOOLS,SATELLITE SITE FOR CHARTER SCHOOLS,OTHER- NON IMF,NEW YORK CITY,,NYC GEOG DIST 30,800000084147,800000084147,POINT (589550.154 4513155.611)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7967,7968,FLORENCE BRASSER SCHOOL,1000 COLDWATER RD,,ROCHESTER,NY,14624,MONROE,TIMOTHY YOUNG,PRINCIPAL,...,timothy_young@gateschili.org,PUBLIC SCHOOLS,PUBLIC SCHOOL INDEPENDENT CENTRAL,PUBLIC SCHOOL (IMF),LARGE CENTRAL DISTRICTS AND VILLAGE DISTRICTS,,GATES-CHILI CSD,800000034044,260401060002,POINT (276682.637 4777256.280)
7968,7969,YORK CENTRAL ELEMENTARY SCHOOL,2578 GENESEE ST,,RETSOF,NY,14539,LIVINGSTON,DANIELLE HOCH,PRINCIPAL,...,dhoch@yorkcsd.org,PUBLIC SCHOOLS,PUBLIC SCHOOL CENTRAL,PUBLIC SCHOOL (IMF),SMALL CENTRAL DISTRICTS,,YORK CSD,800000033935,241701040003,POINT (263492.050 4746015.460)
7969,7970,YORK MIDDLE/HIGH SCHOOL,2578 GENESEE ST,,RETSOF,NY,14539,LIVINGSTON,LINDSEY PEET,PRINCIPAL,...,lmpeet@yorkcsd.org,PUBLIC SCHOOLS,PUBLIC SCHOOL CENTRAL,PUBLIC SCHOOL (IMF),SMALL CENTRAL DISTRICTS,,YORK CSD,800000033936,241701040004,POINT (263492.050 4746015.460)
7970,7971,LIVONIA ELEMENTARY SCHOOL,6 PUPPY LN,,LIVONIA,NY,14487,LIVINGSTON,CHARLES WHITTEL,PRINCIPAL,...,cwhittel2@livoniacsd.org,PUBLIC SCHOOLS,PUBLIC SCHOOL INDEPENDENT CENTRAL,PUBLIC SCHOOL (IMF),MEDIUM CENTRAL DISTRICTS,,LIVONIA CSD,800000033933,240801060001,POINT (282156.200 4743776.422)


In [7]:
# Make a dataframe from geoJSON with minimum needed columns

NYCSchoolsGeom_short = NYCSchoolsGeom[['OBJECTID', 'LEGAL_NAME', 'INSTSUBTYPDESC', 'SDL_DESC', 'geometry']]
NYCSchoolsGeom_short

Unnamed: 0,OBJECTID,LEGAL_NAME,INSTSUBTYPDESC,SDL_DESC,geometry
1,2,CANASERAGA SCHOOL,PUBLIC SCHOOL CENTRAL,CANASERAGA CSD,POINT (272197.657 4704860.403)
3,4,SISULU-WALKER CHARTER SCHOOL OF HARLEM,CHARTER SCHOOL,NYC GEOG DIST 5,POINT (588323.009 4518829.525)
6,7,OUR WORLD NEIGHBORHOOD CHARTER MS,SATELLITE SITE FOR CHARTER SCHOOLS,NYC GEOG DIST 30,POINT (590052.790 4511940.920)
17,18,INNOVA GIRLS ACADEMY CHARTER SCHOOL,CHARTER SCHOOL,ROCHESTER CITY SD,POINT (289477.193 4780096.186)
21,22,GROWING UP GREEN MIDDLE CHARTER SCHOOL,SATELLITE SITE FOR CHARTER SCHOOLS,NYC GEOG DIST 30,POINT (589550.154 4513155.611)
...,...,...,...,...,...
7967,7968,FLORENCE BRASSER SCHOOL,PUBLIC SCHOOL INDEPENDENT CENTRAL,GATES-CHILI CSD,POINT (276682.637 4777256.280)
7968,7969,YORK CENTRAL ELEMENTARY SCHOOL,PUBLIC SCHOOL CENTRAL,YORK CSD,POINT (263492.050 4746015.460)
7969,7970,YORK MIDDLE/HIGH SCHOOL,PUBLIC SCHOOL CENTRAL,YORK CSD,POINT (263492.050 4746015.460)
7970,7971,LIVONIA ELEMENTARY SCHOOL,PUBLIC SCHOOL INDEPENDENT CENTRAL,LIVONIA CSD,POINT (282156.200 4743776.422)


In [11]:
name = 'NYSPubChSchools_temp.csv'
path = os.path.join(basePath, outputFolder, name)
NYCSchoolsGeom_short.to_csv(path)

del name, path

In [8]:
# Dictionnary for schools test results results
subjects = ['Math', 'ELA']
resultsDFs = {'Math': mathResultsDF, 'ELA': ELAResultsDF}

In [9]:
# resultsDF.info() showed that most of the columns are objects instead of numbers and needed to be converted
for subject in subjects:
    resultsDF = resultsDFs[subject]
    resultsDF_colToConvert = ['LEVEL1_COUNT',
     'LEVEL2_COUNT',                             
     'LEVEL3_COUNT',
     'LEVEL4_COUNT',
     ]
    resultsDF[resultsDF_colToConvert] = resultsDF[resultsDF_colToConvert].apply(pd.to_numeric, errors = 'coerce')
    resultsDF.info()
    print(len(resultsDF))
    
del resultsDF

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29424 entries, 0 to 29423
Data columns (total 11 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   INSTITUTION_ID   29424 non-null  int64  
 1   ENTITY_NAME      29424 non-null  object 
 2   Year             29424 non-null  int64  
 3   ASSESSMENT_NAME  29424 non-null  object 
 4   SUBGROUP_NAME    29424 non-null  object 
 5   NUM_TESTED       29424 non-null  int64  
 6   LEVEL1_COUNT     28935 non-null  float64
 7   LEVEL2_COUNT     28935 non-null  float64
 8   LEVEL3_COUNT     28935 non-null  float64
 9   LEVEL4_COUNT     28935 non-null  float64
 10  COUNTY_NAME      29424 non-null  object 
dtypes: float64(4), int64(3), object(4)
memory usage: 2.5+ MB
29424
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9823 entries, 0 to 9822
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   INSTITUTION_ID   9823

In [10]:
for subject in subjects:
    resultsDF = resultsDFs[subject]
    resultsDF = resultsDF[['ENTITY_NAME', 'Year', 'ASSESSMENT_NAME', 'LEVEL1_COUNT', 'LEVEL2_COUNT', 'LEVEL3_COUNT', 'LEVEL4_COUNT']]
    resultsDF.info()
    resultsDFs[subject] = resultsDF
    print(len(resultsDF))
    
del resultsDF

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29424 entries, 0 to 29423
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   ENTITY_NAME      29424 non-null  object 
 1   Year             29424 non-null  int64  
 2   ASSESSMENT_NAME  29424 non-null  object 
 3   LEVEL1_COUNT     28935 non-null  float64
 4   LEVEL2_COUNT     28935 non-null  float64
 5   LEVEL3_COUNT     28935 non-null  float64
 6   LEVEL4_COUNT     28935 non-null  float64
dtypes: float64(4), int64(1), object(2)
memory usage: 1.6+ MB
29424
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9823 entries, 0 to 9822
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   ENTITY_NAME      9823 non-null   object 
 1   Year             9823 non-null   int64  
 2   ASSESSMENT_NAME  9823 non-null   object 
 3   LEVEL1_COUNT     9739 non-null   float64
 4   LEVEL2_COUNT     9739 n

In [11]:
results_AVG2y = {}

for subject in subjects:
        
    resultsDF = resultsDFs[subject]
    
    resultsDF_grouped = resultsDF.groupby(['ENTITY_NAME'])[['LEVEL1_COUNT', 'LEVEL2_COUNT', 'LEVEL3_COUNT', 'LEVEL4_COUNT']].sum()
    # Change column names to include subject
    resultsDF_grouped.columns = [f'Level 1 {subject}',f'Level 2 {subject}',f'Level 3 {subject}',f'Level 4 {subject}']
    
    # Dataframe for middle schools by years with normalized values
    results_AVG2y[subject] = resultsDF_grouped.div(resultsDF_grouped.sum(axis=1), axis=0)
    results_AVG2y[subject].reset_index(inplace=True)
    
    print(results_AVG2y[subject].head(20))
    
    # Dataframe with average
    
del resultsDF, resultsDF_grouped

                             ENTITY_NAME  Level 1 Math  Level 2 Math  \
0                     30TH AVENUE SCHOOL         0.021         0.026   
1          47 AMER SIGN LANG & ENG LOWER         0.176         0.471   
2            A A GATES ELEMENTARY SCHOOL         0.360         0.280   
3             A A KINGSTON MIDDLE SCHOOL         0.292         0.297   
4               A D OLIVER MIDDLE SCHOOL         0.360         0.321   
5     A F PALMER ES / WINDSOR CENTRAL MS         0.179         0.288   
6             A M COSGROVE MIDDLE SCHOOL         0.349         0.346   
7                    A MACARTHUR BARR MS         0.207         0.272   
8        A MACARTHUR BARR MS 5-6 ACADEMY         0.205         0.256   
9   A PHILIP RANDOLPH CAMPUS HIGH SCHOOL           NaN           NaN   
10      AARON MOSSELL JUNIOR HIGH SCHOOL         0.332         0.287   
11                   ABRAHAM WING SCHOOL         0.306         0.306   
12  ACAD FOR COLLEGE PREP AND CAREER EXP         0.527         0

In [15]:
results_Norm = {}

for subject in subjects:
        
    resultsDF = resultsDFs[subject]
    
    resultsDF_grouped = resultsDF.groupby(['ENTITY_NAME', 'Year'])[['LEVEL1_COUNT', 'LEVEL2_COUNT', 'LEVEL3_COUNT', 'LEVEL4_COUNT']].sum()
    # Change column names to include subject
    resultsDF_grouped.columns = [f'Level 1 {subject}',f'Level 2 {subject}',f'Level 3 {subject}',f'Level 4 {subject}']
    
    # Dataframe for middle schools by years with normalized values
    results_Norm[subject] = resultsDF_grouped.div(resultsDF_grouped.sum(axis=1), axis=0)
    results_Norm[subject].reset_index(inplace=True)
    
    print(results_Norm[subject].head(20))
    
    # Dataframe with average
    
del resultsDF, resultsDF_grouped

                             ENTITY_NAME  Year  Level 1 Math  Level 2 Math  \
0                     30TH AVENUE SCHOOL  2022         0.021         0.021   
1                     30TH AVENUE SCHOOL  2023         0.021         0.032   
2          47 AMER SIGN LANG & ENG LOWER  2022         0.167         0.500   
3          47 AMER SIGN LANG & ENG LOWER  2023         0.200         0.400   
4            A A GATES ELEMENTARY SCHOOL  2022         0.288         0.269   
5            A A GATES ELEMENTARY SCHOOL  2023         0.438         0.292   
6             A A KINGSTON MIDDLE SCHOOL  2022         0.322         0.313   
7             A A KINGSTON MIDDLE SCHOOL  2023         0.261         0.280   
8               A D OLIVER MIDDLE SCHOOL  2022         0.397         0.347   
9               A D OLIVER MIDDLE SCHOOL  2023         0.320         0.293   
10    A F PALMER ES / WINDSOR CENTRAL MS  2022         0.200         0.376   
11    A F PALMER ES / WINDSOR CENTRAL MS  2023         0.157    

In [12]:
# Make a merged dataframe with both Math and ELA results for 2y average
DFs = list(results_AVG2y.values())
allResultsDFAVG2y = pd.merge(DFs[0], DFs[1], on = ['ENTITY_NAME'], how = 'inner')
allResultsDFAVG2y.head(5)

Unnamed: 0,ENTITY_NAME,Level 1 Math,Level 2 Math,Level 3 Math,Level 4 Math,Level 1 ELA,Level 2 ELA,Level 3 ELA,Level 4 ELA
0,30TH AVENUE SCHOOL,0.021,0.026,0.212,0.741,0.007,0.021,0.154,0.818
1,47 AMER SIGN LANG & ENG LOWER,0.176,0.471,0.235,0.118,0.119,0.405,0.357,0.119
2,A A GATES ELEMENTARY SCHOOL,0.36,0.28,0.22,0.14,0.22,0.33,0.26,0.19
3,A A KINGSTON MIDDLE SCHOOL,0.292,0.297,0.31,0.1,0.174,0.272,0.312,0.242
4,A D OLIVER MIDDLE SCHOOL,0.36,0.321,0.255,0.065,0.294,0.334,0.257,0.115


In [13]:
allResultsDFAVG2y.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2238 entries, 0 to 2237
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   ENTITY_NAME   2238 non-null   object 
 1   Level 1 Math  1913 non-null   float64
 2   Level 2 Math  1913 non-null   float64
 3   Level 3 Math  1913 non-null   float64
 4   Level 4 Math  1913 non-null   float64
 5   Level 1 ELA   1916 non-null   float64
 6   Level 2 ELA   1916 non-null   float64
 7   Level 3 ELA   1916 non-null   float64
 8   Level 4 ELA   1916 non-null   float64
dtypes: float64(8), object(1)
memory usage: 174.8+ KB


In [16]:
# Make a merged dataframe with both Math and ELA results
DFs = list(results_Norm.values())
allResultsDF = pd.merge(DFs[0], DFs[1], on = ['ENTITY_NAME', 'Year'], how = 'inner')
allResultsDF.head(5)

Unnamed: 0,ENTITY_NAME,Year,Level 1 Math,Level 2 Math,Level 3 Math,Level 4 Math,Level 1 ELA,Level 2 ELA,Level 3 ELA,Level 4 ELA
0,30TH AVENUE SCHOOL,2022,0.021,0.021,0.191,0.766,0.0,0.006,0.116,0.877
1,30TH AVENUE SCHOOL,2023,0.021,0.032,0.232,0.716,0.015,0.036,0.197,0.752
2,47 AMER SIGN LANG & ENG LOWER,2022,0.167,0.5,0.167,0.167,0.16,0.36,0.36,0.12
3,47 AMER SIGN LANG & ENG LOWER,2023,0.2,0.4,0.4,0.0,0.059,0.471,0.353,0.118
4,A A GATES ELEMENTARY SCHOOL,2022,0.288,0.269,0.212,0.231,0.212,0.192,0.308,0.288


In [13]:
allResultsDF.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4091 entries, 0 to 4090
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   ENTITY_NAME   4091 non-null   object 
 1   Year          4091 non-null   int64  
 2   Level 1 Math  3679 non-null   float64
 3   Level 2 Math  3679 non-null   float64
 4   Level 3 Math  3679 non-null   float64
 5   Level 4 Math  3679 non-null   float64
 6   Level 1 ELA   3687 non-null   float64
 7   Level 2 ELA   3687 non-null   float64
 8   Level 3 ELA   3687 non-null   float64
 9   Level 4 ELA   3687 non-null   float64
dtypes: float64(8), int64(1), object(1)
memory usage: 351.6+ KB


In [14]:
print(allResultsDF.duplicated(subset='ENTITY_NAME').sum())

1853


In [14]:
allResultsDFAVG2y['Level 4 Math+Ela'] = allResultsDFAVG2y[f'Level 4 {subjects[0]}']+allResultsDFAVG2y[f'Level 4 {subjects[1]}']
allResultsDFAVG2y.head(5)

Unnamed: 0,ENTITY_NAME,Level 1 Math,Level 2 Math,Level 3 Math,Level 4 Math,Level 1 ELA,Level 2 ELA,Level 3 ELA,Level 4 ELA,Level 4 Math+Ela
0,30TH AVENUE SCHOOL,0.021,0.026,0.212,0.741,0.007,0.021,0.154,0.818,1.559
1,47 AMER SIGN LANG & ENG LOWER,0.176,0.471,0.235,0.118,0.119,0.405,0.357,0.119,0.237
2,A A GATES ELEMENTARY SCHOOL,0.36,0.28,0.22,0.14,0.22,0.33,0.26,0.19,0.33
3,A A KINGSTON MIDDLE SCHOOL,0.292,0.297,0.31,0.1,0.174,0.272,0.312,0.242,0.342
4,A D OLIVER MIDDLE SCHOOL,0.36,0.321,0.255,0.065,0.294,0.334,0.257,0.115,0.179


In [17]:
allResultsDF['Level 4 Math+Ela'] = allResultsDF[f'Level 4 {subjects[0]}']+allResultsDF[f'Level 4 {subjects[1]}']
allResultsDF.head(10)

Unnamed: 0,ENTITY_NAME,Year,Level 1 Math,Level 2 Math,Level 3 Math,Level 4 Math,Level 1 ELA,Level 2 ELA,Level 3 ELA,Level 4 ELA,Level 4 Math+Ela
0,30TH AVENUE SCHOOL,2022,0.021,0.021,0.191,0.766,0.0,0.006,0.116,0.877,1.643
1,30TH AVENUE SCHOOL,2023,0.021,0.032,0.232,0.716,0.015,0.036,0.197,0.752,1.468
2,47 AMER SIGN LANG & ENG LOWER,2022,0.167,0.5,0.167,0.167,0.16,0.36,0.36,0.12,0.287
3,47 AMER SIGN LANG & ENG LOWER,2023,0.2,0.4,0.4,0.0,0.059,0.471,0.353,0.118,0.118
4,A A GATES ELEMENTARY SCHOOL,2022,0.288,0.269,0.212,0.231,0.212,0.192,0.308,0.288,0.519
5,A A GATES ELEMENTARY SCHOOL,2023,0.438,0.292,0.229,0.042,0.229,0.479,0.208,0.083,0.125
6,A A KINGSTON MIDDLE SCHOOL,2022,0.322,0.313,0.261,0.104,0.162,0.271,0.312,0.255,0.359
7,A A KINGSTON MIDDLE SCHOOL,2023,0.261,0.28,0.362,0.096,0.186,0.273,0.312,0.229,0.326
8,A D OLIVER MIDDLE SCHOOL,2022,0.397,0.347,0.189,0.066,0.26,0.364,0.234,0.142,0.208
9,A D OLIVER MIDDLE SCHOOL,2023,0.32,0.293,0.324,0.063,0.331,0.302,0.281,0.086,0.149


In [18]:
allResultsDF.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4091 entries, 0 to 4090
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   ENTITY_NAME       4091 non-null   object 
 1   Year              4091 non-null   int64  
 2   Level 1 Math      3679 non-null   float64
 3   Level 2 Math      3679 non-null   float64
 4   Level 3 Math      3679 non-null   float64
 5   Level 4 Math      3679 non-null   float64
 6   Level 1 ELA       3687 non-null   float64
 7   Level 2 ELA       3687 non-null   float64
 8   Level 3 ELA       3687 non-null   float64
 9   Level 4 ELA       3687 non-null   float64
 10  Level 4 Math+Ela  3679 non-null   float64
dtypes: float64(9), int64(1), object(1)
memory usage: 383.5+ KB


In [None]:
name = 'NYSPubChSchoolsTestResults2023_temp.csv'
path = os.path.join(basePath, outputFolder, name)
allResultsDF.to_csv(path)

del name, path

In [18]:
# Make plots for popups in the map and add them as columns to the mappable dataframe

# Set interactive mode off
plt.ioff()

# list of schools names

schoolsNames = allResultsDF['ENTITY_NAME'].to_list()
testResults = allResultsDF

# Create disctionnary to hold the dataframes by schools
schoolDFs = {}

# Make dataframes by schools 
for name in schoolsNames:
    dfName = name
    schoolDFs[dfName] = testResults[testResults['ENTITY_NAME'] == name]

plots = []
plotsDFs = {}

print("Making test results plots ...")

for subject in subjects:
    columns_to_plot = [f"Level 1 {subject}", f"Level 2 {subject}", f"Level 3 {subject}", f"Level 4 {subject}"]  
    # Plot dataframes by school
    for schoolDF, current_dataframe in tqdm(schoolDFs.items()):
        # schoolDF contains the name of the dataframe
        # current_dataframe contains the dataframe itself

            # Do something with current_dataframe
            # Create a plot
            fig = create_plot(current_dataframe, schoolDF, columns_to_plot)

            # Convert the plot to a PNG image and then encode it
            io_buf = BytesIO()
            fig.savefig(io_buf, format='png', bbox_inches='tight', dpi=85)
            # Close the figure
            plt.close()
            #Reading file to get the base64 string
            io_buf.seek(0)
            base64_string = base64.b64encode(io_buf.read()).decode('utf8')

            pair = (schoolDF, base64_string)

            plots.append(pair)

    # add the plots to the geodataframe of middle schools subject results 
    plotsDFs[subject] = pd.DataFrame(plots, columns=['ENTITY_NAME', f'plot {subject}'])
    
# Concatenate all plots DataFrames along the columns before merging
combined_plots_df = pd.concat(plotsDFs.values(), axis=1)
            
print('Adding plots to the data frame with test results.')    
allResultsDFAVG2y = pd.merge(allResultsDFAVG2y, combined_plots_df, left_on = 'ENTITY_NAME', right_on=combined_plots_df.iloc[:, 0])
print('Done.')    
# Set interactive mode on
# plt.ion()

Making test results plots ...


100%|██████████████████████████████████████████████████████████████████████████████| 2238/2238 [05:15<00:00,  7.10it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 2238/2238 [05:17<00:00,  7.05it/s]

Adding plots to the data frame with test results.
Done.





In [19]:
allResultsDFAVG2y.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2238 entries, 0 to 2237
Data columns (total 15 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   ENTITY_NAME       2238 non-null   object 
 1   ENTITY_NAME_x     2238 non-null   object 
 2   Level 1 Math      1913 non-null   float64
 3   Level 2 Math      1913 non-null   float64
 4   Level 3 Math      1913 non-null   float64
 5   Level 4 Math      1913 non-null   float64
 6   Level 1 ELA       1916 non-null   float64
 7   Level 2 ELA       1916 non-null   float64
 8   Level 3 ELA       1916 non-null   float64
 9   Level 4 ELA       1916 non-null   float64
 10  Level 4 Math+Ela  1913 non-null   float64
 11  ENTITY_NAME_y     2238 non-null   object 
 12  plot Math         2238 non-null   object 
 13  ENTITY_NAME_y     2238 non-null   object 
 14  plot ELA          2238 non-null   object 
dtypes: float64(9), object(6)
memory usage: 279.8+ KB


In [20]:
allResultsDF.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4091 entries, 0 to 4090
Data columns (total 16 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   ENTITY_NAME       4091 non-null   object 
 1   ENTITY_NAME_x     4091 non-null   object 
 2   Year              4091 non-null   int64  
 3   Level 1 Math      3679 non-null   float64
 4   Level 2 Math      3679 non-null   float64
 5   Level 3 Math      3679 non-null   float64
 6   Level 4 Math      3679 non-null   float64
 7   Level 1 ELA       3687 non-null   float64
 8   Level 2 ELA       3687 non-null   float64
 9   Level 3 ELA       3687 non-null   float64
 10  Level 4 ELA       3687 non-null   float64
 11  Level 4 Math+Ela  3679 non-null   float64
 12  ENTITY_NAME_y     4091 non-null   object 
 13  plot Math         4091 non-null   object 
 14  ENTITY_NAME_y     4091 non-null   object 
 15  plot ELA          4091 non-null   object 
dtypes: float64(9), int64(1), object(6)
memory 

In [21]:
allResultsDFAVG2y = allResultsDFAVG2y.drop(['ENTITY_NAME_y', 'ENTITY_NAME_x'], axis = 1)

In [22]:
allResultsDFAVG2y.head()

Unnamed: 0,ENTITY_NAME,Level 1 Math,Level 2 Math,Level 3 Math,Level 4 Math,Level 1 ELA,Level 2 ELA,Level 3 ELA,Level 4 ELA,Level 4 Math+Ela,plot Math,plot ELA
0,30TH AVENUE SCHOOL,0.021,0.026,0.212,0.741,0.007,0.021,0.154,0.818,1.559,iVBORw0KGgoAAAANSUhEUgAAATIAAAD4CAYAAABvwmqjAA...,iVBORw0KGgoAAAANSUhEUgAAATIAAAD4CAYAAABvwmqjAA...
1,47 AMER SIGN LANG & ENG LOWER,0.176,0.471,0.235,0.118,0.119,0.405,0.357,0.119,0.237,iVBORw0KGgoAAAANSUhEUgAAAYsAAAD4CAYAAAAdIcpQAA...,iVBORw0KGgoAAAANSUhEUgAAAYsAAAD4CAYAAAAdIcpQAA...
2,A A GATES ELEMENTARY SCHOOL,0.36,0.28,0.22,0.14,0.22,0.33,0.26,0.19,0.33,iVBORw0KGgoAAAANSUhEUgAAAXoAAAD4CAYAAADiry33AA...,iVBORw0KGgoAAAANSUhEUgAAAXoAAAD4CAYAAADiry33AA...
3,A A KINGSTON MIDDLE SCHOOL,0.292,0.297,0.31,0.1,0.174,0.272,0.312,0.242,0.342,iVBORw0KGgoAAAANSUhEUgAAAXEAAAD4CAYAAAAaT9YAAA...,iVBORw0KGgoAAAANSUhEUgAAAXEAAAD4CAYAAAAaT9YAAA...
4,A D OLIVER MIDDLE SCHOOL,0.36,0.321,0.255,0.065,0.294,0.334,0.257,0.115,0.179,iVBORw0KGgoAAAANSUhEUgAAAVwAAAD4CAYAAACg7F5gAA...,iVBORw0KGgoAAAANSUhEUgAAAVwAAAD4CAYAAACg7F5gAA...


In [21]:
allResultsDF = allResultsDF.drop(['ENTITY_NAME_y', 'ENTITY_NAME_x'], axis=1)

In [27]:
allResultsDF.head()

Unnamed: 0,ENTITY_NAME,Year,Level 1 Math,Level 2 Math,Level 3 Math,Level 4 Math,Level 1 ELA,Level 2 ELA,Level 3 ELA,Level 4 ELA,Level 4 Math+Ela,plot Math,plot ELA,matched_name
0,30TH AVENUE SCHOOL,2022,0.021,0.021,0.191,0.766,0.0,0.006,0.116,0.877,1.643,iVBORw0KGgoAAAANSUhEUgAAATIAAAD4CAYAAABvwmqjAA...,iVBORw0KGgoAAAANSUhEUgAAATIAAAD4CAYAAABvwmqjAA...,DUTCH LANE SCHOOL
1,30TH AVENUE SCHOOL,2023,0.021,0.032,0.232,0.716,0.015,0.036,0.197,0.752,1.468,iVBORw0KGgoAAAANSUhEUgAAATIAAAD4CAYAAABvwmqjAA...,iVBORw0KGgoAAAANSUhEUgAAATIAAAD4CAYAAABvwmqjAA...,DUTCH LANE SCHOOL
2,47 AMER SIGN LANG & ENG LOWER,2022,0.167,0.5,0.167,0.167,0.16,0.36,0.36,0.12,0.287,iVBORw0KGgoAAAANSUhEUgAAAYsAAAD4CAYAAAAdIcpQAA...,iVBORw0KGgoAAAANSUhEUgAAAYsAAAD4CAYAAAAdIcpQAA...,47 AMERICAN SIGN LANGUAGE AND ENGLISH LOWER SC...
3,47 AMER SIGN LANG & ENG LOWER,2023,0.2,0.4,0.4,0.0,0.059,0.471,0.353,0.118,0.118,iVBORw0KGgoAAAANSUhEUgAAAYsAAAD4CAYAAAAdIcpQAA...,iVBORw0KGgoAAAANSUhEUgAAAYsAAAD4CAYAAAAdIcpQAA...,47 AMERICAN SIGN LANGUAGE AND ENGLISH LOWER SC...
4,A A GATES ELEMENTARY SCHOOL,2022,0.288,0.269,0.212,0.231,0.212,0.192,0.308,0.288,0.519,iVBORw0KGgoAAAANSUhEUgAAAXoAAAD4CAYAAADiry33AA...,iVBORw0KGgoAAAANSUhEUgAAAXoAAAD4CAYAAADiry33AA...,A A GATES ELEMENTARY SCHOOL


In [22]:
allResultsDF_2023 = allResultsDF[allResultsDF['Year'] == 2023]

In [23]:
allResultsDF_2023.head()

Unnamed: 0,ENTITY_NAME,Year,Level 1 Math,Level 2 Math,Level 3 Math,Level 4 Math,Level 1 ELA,Level 2 ELA,Level 3 ELA,Level 4 ELA,Level 4 Math+Ela,plot Math,plot ELA
1,30TH AVENUE SCHOOL,2023,0.021,0.032,0.232,0.716,0.015,0.036,0.197,0.752,1.468,iVBORw0KGgoAAAANSUhEUgAAATIAAAD4CAYAAABvwmqjAA...,iVBORw0KGgoAAAANSUhEUgAAATIAAAD4CAYAAABvwmqjAA...
3,47 AMER SIGN LANG & ENG LOWER,2023,0.2,0.4,0.4,0.0,0.059,0.471,0.353,0.118,0.118,iVBORw0KGgoAAAANSUhEUgAAAYsAAAD4CAYAAAAdIcpQAA...,iVBORw0KGgoAAAANSUhEUgAAAYsAAAD4CAYAAAAdIcpQAA...
5,A A GATES ELEMENTARY SCHOOL,2023,0.438,0.292,0.229,0.042,0.229,0.479,0.208,0.083,0.125,iVBORw0KGgoAAAANSUhEUgAAAXoAAAD4CAYAAADiry33AA...,iVBORw0KGgoAAAANSUhEUgAAAXoAAAD4CAYAAADiry33AA...
7,A A KINGSTON MIDDLE SCHOOL,2023,0.261,0.28,0.362,0.096,0.186,0.273,0.312,0.229,0.326,iVBORw0KGgoAAAANSUhEUgAAAXEAAAD4CAYAAAAaT9YAAA...,iVBORw0KGgoAAAANSUhEUgAAAXEAAAD4CAYAAAAaT9YAAA...
9,A D OLIVER MIDDLE SCHOOL,2023,0.32,0.293,0.324,0.063,0.331,0.302,0.281,0.086,0.149,iVBORw0KGgoAAAANSUhEUgAAAVwAAAD4CAYAAACg7F5gAA...,iVBORw0KGgoAAAANSUhEUgAAAVwAAAD4CAYAAACg7F5gAA...


In [23]:
# Matching the school all data file average for 2 years 
#with spatial data (geojson of schools locations)

tqdm.pandas(desc="Matching Names")

matched_tuples = allResultsDFAVG2y['ENTITY_NAME'].progress_apply(
    lambda x: match_name(x, NYCSchoolsGeom_short['LEGAL_NAME'], min_score=60))

print('Done.')

Matching Names: 100%|██████████████████████████████████████████████████████████████| 2238/2238 [22:35<00:00,  1.65it/s]

Done.





In [27]:
# Matching the school all data file with spatial data (geojson of schools locations)

tqdm.pandas(desc="Matching Names")

matched_tuples = allResultsDF_2023['ENTITY_NAME'].progress_apply(
    lambda x: match_name(x, NYCSchoolsGeom_short['LEGAL_NAME'], min_score=60))

print('Done.')

Matching Names: 100%|██████████████████████████████████████████████████████████████| 2045/2045 [19:09<00:00,  1.78it/s]

Done.





In [28]:
allResultsDF_2023.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2045 entries, 1 to 4090
Data columns (total 13 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   ENTITY_NAME       2045 non-null   object 
 1   Year              2045 non-null   int64  
 2   Level 1 Math      1837 non-null   float64
 3   Level 2 Math      1837 non-null   float64
 4   Level 3 Math      1837 non-null   float64
 5   Level 4 Math      1837 non-null   float64
 6   Level 1 ELA       1842 non-null   float64
 7   Level 2 ELA       1842 non-null   float64
 8   Level 3 ELA       1842 non-null   float64
 9   Level 4 ELA       1842 non-null   float64
 10  Level 4 Math+Ela  1837 non-null   float64
 11  plot Math         2045 non-null   object 
 12  plot ELA          2045 non-null   object 
dtypes: float64(9), int64(1), object(3)
memory usage: 223.7+ KB


In [24]:
print('Appending mathes to the dataframe.')
allResultsDFAVG2y['matched_name'] = list(zip(*matched_tuples))[0]
allResultsDFAVG2y['matched_score'] = list(zip(*matched_tuples))[1]
print('Done.')

Appending mathes to the dataframe.
Done.


In [29]:
print('Appending mathes to the dataframe.')
allResultsDF_2023['matched_name'] = list(zip(*matched_tuples))[0]
allResultsDF_2023['matched_score'] = list(zip(*matched_tuples))[1]
print('Done.')

Appending mathes to the dataframe.
Done.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [25]:
# Unmatched or matched incorrectly names identified by 
# visual observations on the map or by analysing the geoJSON in prefered software

unmatched = {
'BGLIG-SHIRLEY RODRIGUEZ-REMENESKI CS':'BRONX GLOBAL LEARNING INSTITUTE FOR GIRLS CHARTER SCHOOL THE SHIRLEY RODRGUEZ-REMENESKI SCHOOL',
'MEADOW HILL GLOBAL EXPLORATIONS MAGN':'',
'SEED HARLEM':'SCHOOL OF EARTH EXPLORATION AND DISCOVERY HARLEM (SEED HARLEM)',
'PS/IS 210 21ST CENTURY ACADEMY':'PS/IS 210 TWENTY-FIRST CENTURY ACADEMY FOR COMMUNITY LEADERSHIP',
'HARBOR HEIGHTS':'HARBOR VIEW SCHOOL (THE)',
'QUEENS COLLEGIATE':'QUEENS COLLEGIATE - A COLLEGE BOARD SCHOOL',
'LAWRENCE ES-BROADWAY':'',
'BROOKLYN EAST COLLEGIATE CS':'',
'COLLEGIATE ACADEMY-MATH-PERSONAL AWA':'COLLEGIATE ACADEMY FOR MATHEMATICS AND PERSONAL AWARENESS CHARTER SCHOOL',
'SOUNDVIEW ACADEMY':'SOUNDVIEW ACADEMY FOR CULTURE AND SCHOLARSHIP',
'MS 224 MANHATTAN EAST':'MS 224 MANHATTAN EAST SCHOOL FOR ARTS & ACADEMICS',
'PATHWAYS COLLEGE PREPARATORY':'PATHWAYS COLLEGE PREPARATORY SCHOOL:  A COLLEGE BOARD SCHOOL',
'30TH AVENUE SCHOOL':'30TH AVENUE SCHOOL (THE) (G & T CITYWIDE)',
'OPPENHEIM-EPHRATAH-ST JOHNSVILLE JS':'OPPENHEIM-EPHRATAH-ST JOHNSVILLE JUNIOR/SENIOR HIGH SCHOOL',
'SCIENCE AND TECHNOLOGY ACADEMY':'SCIENCE AND TECHNOLOGY',
'SULLIVAN WEST HIGH SCHOOL':'SULLIVAN WEST HIGH SCHOOL AT LAKE HUNTINGTON',
}

In [26]:
# Replacing the erroneus matches in the allResultsDF_2023 data frame

def replace_values(row):
    if row['ENTITY_NAME'] in unmatched:
        row['matched_name'] = unmatched[row['ENTITY_NAME']]
    return row

allResultsDFAVG2y = allResultsDFAVG2y.apply(replace_values, axis = 1)

In [36]:
# Replacing the erroneus matches in the allResultsDF_2023 data frame

def replace_values(row):
    if row['ENTITY_NAME'] in unmatched:
        row['matched_name'] = unmatched[row['ENTITY_NAME']]
    return row

allResultsDF_2023 = allResultsDF_2023.apply(replace_values, axis = 1)

In [38]:
name = 'NYSPubChSchoolsTestResults2023_tempMatched.csv'
path = os.path.join(basePath, outputFolder, name)
print(f'Saving to {path} ...')
allResultsDF_2023.to_csv(path)
print('Saved.')
del name, path

# Merging DataFrames based on the matched name

finalGeoDF = pd.merge(NYCSchoolsGeom_short,allResultsDF_2023, left_on='LEGAL_NAME', right_on='matched_name')
allData_Name = 'PublicCharterNYSschools.geojson'
allData_Path = os.path.join(basePath,outputFolder, allData_Name)
print(f'Saving to {allData_Path} ...')
finalGeoDF.to_file(allData_Path, driver="GeoJSON")
print('Saved.')

del allData_Name, allData_Path

Saving to G:\My Drive\Kids\NYC_schools_mapped\processed_data\NYSPubChSchoolsTestResults2023_tempMatched.csv ...
Saved.
Saving to G:\My Drive\Kids\NYC_schools_mapped\processed_data\PublicCharterNYSschools.geojson ...
Saved.


In [27]:
name = 'NYSPubChSchoolsTestResults2yAVG_tempMatched.csv'
path = os.path.join(basePath, outputFolder, name)
print(f'Saving to {path} ...')
allResultsDFAVG2y.to_csv(path)
print('Saved.')
del name, path

# Merging DataFrames based on the matched name

finalGeoDF = pd.merge(NYCSchoolsGeom_short, allResultsDFAVG2y, left_on='LEGAL_NAME', right_on='matched_name')
allData_Name = 'PublicCharterNYSschools2yAVG.geojson'
allData_Path = os.path.join(basePath,outputFolder, allData_Name)
print(f'Saving to {allData_Path} ...')
finalGeoDF.to_file(allData_Path, driver="GeoJSON")
print('Saved.')

del allData_Name, allData_Path

Saving to G:\My Drive\Kids\NYC_schools_mapped\processed_data\NYSPubChSchoolsTestResults2yAVG_tempMatched.csv ...
Saved.
Saving to G:\My Drive\Kids\NYC_schools_mapped\processed_data\PublicCharterNYSschools2yAVG.geojson ...
Saved.


In [28]:
finalGeoDF.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 2278 entries, 0 to 2277
Data columns (total 19 columns):
 #   Column            Non-Null Count  Dtype   
---  ------            --------------  -----   
 0   OBJECTID          2278 non-null   int64   
 1   LEGAL_NAME        2278 non-null   object  
 2   INSTSUBTYPDESC    2278 non-null   object  
 3   SDL_DESC          2278 non-null   object  
 4   geometry          2278 non-null   geometry
 5   ENTITY_NAME       2278 non-null   object  
 6   Level 1 Math      1946 non-null   float64 
 7   Level 2 Math      1946 non-null   float64 
 8   Level 3 Math      1946 non-null   float64 
 9   Level 4 Math      1946 non-null   float64 
 10  Level 1 ELA       1949 non-null   float64 
 11  Level 2 ELA       1949 non-null   float64 
 12  Level 3 ELA       1949 non-null   float64 
 13  Level 4 ELA       1949 non-null   float64 
 14  Level 4 Math+Ela  1946 non-null   float64 
 15  plot Math         2278 non-null   object  
 16  plot ELA        

In [None]:
finalGeoDF.info()

### Generating the map

In [30]:
from IPython.core.display import display, HTML

display(HTML("<style>.output_scroll { height: auto !important; max-height: 1500px; }</style>"))

# Create a map object, centered at NYC
mapNYS = folium.Map(location=[40.6839, -73.9026], zoom_start=11, tiles="cartodb positron")
   
# Add dataframes with coordinates and test results to the map

def my_style(x):
    level4 = x['properties']['Level 4 Math+Ela']
    charter = x['properties']['INSTSUBTYPDESC']
    color = '#f0a607' if charter == 'CHARTER SCHOOL'  else '#f0a607' if charter == 'SATELLITE SITE FOR CHARTER SCHOOLS' else '#06a6cf'
    #fill_color = '#f0a607' if charter == 'CHARTER SCHOOL'  else '#f0a607' if charter == 'SATELLITE SITE FOR CHARTER SCHOOLS' else '#06a6cf'
    if level4 is None:
        level4 = 0
    #print(level4)
    return {
        "radius": (level4)*500,
        "color": color,
        #"fill_color": fill_color,
    }  


# Function to create iframe for a given row
def create_iframe(row):    
    html =  '<strong>{0}:</strong> {1}<br><strong>{2}:</strong> {3}<br><strong>{4}:</strong> {5}<br>\
    <br><img src="data:image/png;base64,{6}"><br>\
    <img src="data:image/png;base64,{7}">'.format(
        'School Name', row['LEGAL_NAME'],
        'Level 4 share 2022-2023 AVG Math', round(row['Level 4 Math'], 2), 
        'Level 4 share 2022-2023 AVG ELA', round(row['Level 4 ELA'], 2),
        row['plot Math'], row['plot ELA'])
    return folium.IFrame(html, width=500, height=450)

def create_popup(x):
    iframe = create_iframe(x)
    popup = folium.Popup(iframe)
    return popup

# Iterate over the GeoDataFrame and add a popup to each feature
for _, row in tqdm(finalGeoDF.iterrows(), total = len(finalGeoDF)):
    iframe = create_iframe(row)
        
    data = gpd.GeoDataFrame(row.to_frame().T, crs=finalGeoDF.crs)
    
    folium.GeoJson(
    data,
    marker = folium.Circle(radius=10, fill_color='white', fill_opacity=0, color="green", weight=2),
    #marker = folium.Circle(radius=10),    
    popup = folium.Popup(iframe),
    style_function = my_style, 
    control = False    
    #zoom_on_click = True,    
).add_to(mapNYS)    
        
folium.LayerControl().add_to(mapNYS)    
  
# # Display the map
# mapNYC

# Save map to html
mfile = 'NYSpublicAndCharter2yAVG.html'
mpath = os.path.join(basePath, outputFolder, mfile)
print(f'Saving to {mpath} ...')
mapNYS.save(mpath)
print('Saved.')

100%|██████████████████████████████████████████████████████████████████████████████| 2278/2278 [01:46<00:00, 21.31it/s]


Saving to G:\My Drive\Kids\NYC_schools_mapped\processed_data\NYSpublicAndCharter2yAVG.html ...
Saved.


In [None]:
from IPython.core.display import display, HTML

display(HTML("<style>.output_scroll { height: auto !important; max-height: 1500px; }</style>"))

# Create a map object, centered at NYC
mapNYS = folium.Map(location=[40.6839, -73.9026], zoom_start=11, tiles="cartodb positron")
   
# Add dataframes with coordinates and test results to the map

def my_style(x):
    level4 = x['properties']['Level 4 Math+Ela']
    charter = x['properties']['INSTSUBTYPDESC']
    color = '#f0a607' if charter == 'CHARTER SCHOOL'  else '#f0a607' if charter == 'SATELLITE SITE FOR CHARTER SCHOOLS' else '#06a6cf'
    #fill_color = '#f0a607' if charter == 'CHARTER SCHOOL'  else '#f0a607' if charter == 'SATELLITE SITE FOR CHARTER SCHOOLS' else '#06a6cf'
    if level4 is None:
        level4 = 0
    #print(level4)
    return {
        "radius": (level4)*500,
        "color": color,
        #"fill_color": fill_color,
    }  


# Function to create iframe for a given row
def create_iframe(row):    
    html =  '<strong>{0}:</strong> {1}<br><strong>{2}:</strong> {3}<br><strong>{4}:</strong> {5}<br>\
    <br><img src="data:image/png;base64,{6}"><br>\
    <img src="data:image/png;base64,{7}">'.format(
        'School Name', row['LEGAL_NAME'],
        'Level 4 share 2023 Math', round(row['Level 4 Math'], 2), 
        'Level 4 share 2023 ELA', round(row['Level 4 ELA'], 2),
        row['plot Math'], row['plot ELA'])
    return folium.IFrame(html, width=500, height=450)

def create_popup(x):
    iframe = create_iframe(x)
    popup = folium.Popup(iframe)
    return popup

# Iterate over the GeoDataFrame and add a popup to each feature
for _, row in tqdm(finalGeoDF.iterrows(), total = len(finalGeoDF)):
    iframe = create_iframe(row)
        
    data = gpd.GeoDataFrame(row.to_frame().T, crs=finalGeoDF.crs)
    
    folium.GeoJson(
    data,
    marker = folium.Circle(radius=10, fill_color='white', fill_opacity=0, color="green", weight=2),
    #marker = folium.Circle(radius=10),    
    popup = folium.Popup(iframe),
    style_function = my_style, 
    control = False    
    #zoom_on_click = True,    
).add_to(mapNYS)    
        
folium.LayerControl().add_to(mapNYS)    
  
# # Display the map
# mapNYC

# Save map to html
mfile = 'NYSpublicAndCharter.html'
mpath = os.path.join(basePath, outputFolder, mfile)
print(f'Saving to {mpath} ...')
mapNYS.save(mpath)
print('Saved.')

In [None]:
finalGeoDF['SDL_DESC'].unique()

In [None]:
NYCSchoolsGeom_short['SDL_DESC'].unique()

In [None]:
NYCSchoolsGeom_short.info()

In [None]:
import folium
m = folium.Map(location=[40.6839, -73.9026], zoom_start=11, tiles="cartodb positron")

# mfile = 'PublicCharterNYSschools.geojson'
# mpath = os.path.join(basePath, outputFolder, mfile)
# mpath = os.path.join(basePath, dataFolder, SchoolsFile)
# Add GeoJSON as an external file
folium.GeoJson(NYCSchoolsGeom, 
               marker = folium.Circle(radius=10, fill_color='white', fill_opacity=0, color="green", weight=2),
               ).add_to(m)

# Save the map
m.save('map.html')

# del mfile, mpath
# del mpath

In [None]:
%pwd

In [None]:
import folium
m = folium.Map(location=[40.6839, -73.9026], zoom_start=11, tiles="cartodb positron")

folium.GeoJson(finalGeoDF, 
               marker = folium.Circle(radius=10, fill_color='white', fill_opacity=0, color="green", weight=2),
               ).add_to(m)

# Save the map
m.save('map2.html')

In [None]:
finalGeoDF.head()

In [None]:
import folium
m = folium.Map(location=[40.6839, -73.9026], zoom_start=11, tiles="cartodb positron")

mfile = 'PublicCharterNYSschools.geojson'
mpath = os.path.join(basePath, outputFolder, mfile)
# mpath = os.path.join(basePath, dataFolder, SchoolsFile)
# Add GeoJSON as an external file
folium.GeoJson(mpath, 
               marker = folium.Circle(radius=10, fill_color='white', fill_opacity=0, color="green", weight=2),
               ).add_to(m)

# Save the map
mfile = 'map3.html'
mpath = os.path.join(basePath, outputFolder, mfile)
m.save(mpath)

del mfile, mpath
# del mpath

In [None]:
import folium
m = folium.Map(location=[40.6839, -73.9026], zoom_start=11, tiles="cartodb positron")

mfile = 'NYS_Schools.geojson'
mpath = os.path.join(basePath, dataFolder, SchoolsFile)

# Add GeoJSON as an external file
folium.GeoJson(mpath,  name='geojson').add_to(m)

# Save the map
m.save('map.html')

del mfile, mpath
# del mpath