In [None]:
## import pandas 
import pandas as pd 

In [2]:
## let's read the file 
df = pd.read_excel("raw-data/Demographic_Snapshot_ENL.xlsx", sheet_name = "School")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9363 entries, 0 to 9362
Data columns (total 46 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   DBN                             9363 non-null   object 
 1   School Name                     9363 non-null   object 
 2   Year                            9363 non-null   object 
 3   Total Enrollment                9363 non-null   int64  
 4   Grade 3K                        9363 non-null   int64  
 5   Grade PK (Half Day & Full Day)  9363 non-null   int64  
 6   Grade K                         9363 non-null   int64  
 7   Grade 1                         9363 non-null   int64  
 8   Grade 2                         9363 non-null   int64  
 9   Grade 3                         9363 non-null   int64  
 10  Grade 4                         9363 non-null   int64  
 11  Grade 5                         9363 non-null   int64  
 12  Grade 6                         93

In [3]:
## let's eliminate unwanted columns 
df_clean = df[["School Name", "Year", "Total Enrollment", "# English Language Learners", "% English Language Learners"]]
df_clean

Unnamed: 0,School Name,Year,Total Enrollment,# English Language Learners,% English Language Learners
0,P.S. 015 Roberto Clemente,2019-20,190,17,0.089474
1,P.S. 015 Roberto Clemente,2020-21,193,21,0.108808
2,P.S. 015 Roberto Clemente,2021-22,179,11,0.061453
3,P.S. 015 Roberto Clemente,2022-23,180,12,0.066667
4,P.S. 015 Roberto Clemente,2023-24,189,24,0.126984
...,...,...,...,...,...
9358,Bronx Charter School for the Arts,2019-20,523,69,0.131931
9359,Bronx Charter School for the Arts,2020-21,626,78,0.124601
9360,Bronx Charter School for the Arts,2021-22,598,79,0.132107
9361,Bronx Charter School for the Arts,2022-23,591,73,0.123519


In [4]:
## to improve readability, let's define each year range using the column name 'Start Year' 
df_clean['Start Year'] = df_clean['Year'].str.extract(r'(\d{4})').astype(int)
df_clean

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_clean['Start Year'] = df_clean['Year'].str.extract(r'(\d{4})').astype(int)


Unnamed: 0,School Name,Year,Total Enrollment,# English Language Learners,% English Language Learners,Start Year
0,P.S. 015 Roberto Clemente,2019-20,190,17,0.089474,2019
1,P.S. 015 Roberto Clemente,2020-21,193,21,0.108808,2020
2,P.S. 015 Roberto Clemente,2021-22,179,11,0.061453,2021
3,P.S. 015 Roberto Clemente,2022-23,180,12,0.066667,2022
4,P.S. 015 Roberto Clemente,2023-24,189,24,0.126984,2023
...,...,...,...,...,...,...
9358,Bronx Charter School for the Arts,2019-20,523,69,0.131931,2019
9359,Bronx Charter School for the Arts,2020-21,626,78,0.124601,2020
9360,Bronx Charter School for the Arts,2021-22,598,79,0.132107,2021
9361,Bronx Charter School for the Arts,2022-23,591,73,0.123519,2022


In [5]:
## Now, we'll generate a pivot table with school years as column values

school_ell_trends = df_clean.pivot_table(
    index='School Name',
    columns='Start Year',
    values='# English Language Learners'
)
school_ell_trends

Start Year,2019,2020,2021,2022,2023
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
47 The American Sign Language and English Secondar,11.0,15.0,9.0,8.0,12.0
A School Without Walls,,,,,3.0
A-Tech High School,14.0,22.0,33.0,31.0,21.0
A. Philip Randolph Campus High School,185.0,223.0,216.0,176.0,195.0
A.C.E. Academy for Scholars at the Geraldine Ferra,64.0,62.0,66.0,71.0,83.0
...,...,...,...,...,...
"Young Women's Leadership School, Queens",10.0,18.0,15.0,26.0,42.0
Zeta Charter School � Mount Eden,,4.0,44.0,66.0,93.0
Zeta Charter School � Tremont Park,,3.0,24.0,37.0,37.0
Zeta Charter Schools - Inwood,42.0,59.0,89.0,87.0,65.0


In [6]:
school_ell_trends.head()

Start Year,2019,2020,2021,2022,2023
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
47 The American Sign Language and English Secondar,11.0,15.0,9.0,8.0,12.0
A School Without Walls,,,,,3.0
A-Tech High School,14.0,22.0,33.0,31.0,21.0
A. Philip Randolph Campus High School,185.0,223.0,216.0,176.0,195.0
A.C.E. Academy for Scholars at the Geraldine Ferra,64.0,62.0,66.0,71.0,83.0


In [7]:
# How many unique schools are in the filtered DataFrame?
# let's check to see the data is accurate 
original_school_count = df_clean['School Name'].nunique()
original_school_count

1886

In [8]:
# How many schools made it into the pivot table?
# let's check to see the data is accurate 
pivot_school_count = school_ell_trends.shape[0]
pivot_school_count

1886

In [9]:
## Let's replace the NaN values with 0 
school_ell_trends = school_ell_trends.fillna(0)
school_ell_trends

Start Year,2019,2020,2021,2022,2023
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
47 The American Sign Language and English Secondar,11.0,15.0,9.0,8.0,12.0
A School Without Walls,0.0,0.0,0.0,0.0,3.0
A-Tech High School,14.0,22.0,33.0,31.0,21.0
A. Philip Randolph Campus High School,185.0,223.0,216.0,176.0,195.0
A.C.E. Academy for Scholars at the Geraldine Ferra,64.0,62.0,66.0,71.0,83.0
...,...,...,...,...,...
"Young Women's Leadership School, Queens",10.0,18.0,15.0,26.0,42.0
Zeta Charter School � Mount Eden,0.0,4.0,44.0,66.0,93.0
Zeta Charter School � Tremont Park,0.0,3.0,24.0,37.0,37.0
Zeta Charter Schools - Inwood,42.0,59.0,89.0,87.0,65.0


In [10]:
## Percentage change between (2021 - 2022) and (2022 - 2023)
school_ell_trends['Change_2021_2022'] = ((school_ell_trends[2022] - school_ell_trends[2021]) / school_ell_trends[2021]) * 100

## Percentage change between (2022 - 2023) and (2023 - 2024)
school_ell_trends['Change_2022_2023'] = ((school_ell_trends[2023] - school_ell_trends[2022]) / school_ell_trends[2022]) * 100

## Display updated DataFrame
school_ell_trends

Start Year,2019,2020,2021,2022,2023,Change_2021_2022,Change_2022_2023
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
47 The American Sign Language and English Secondar,11.0,15.0,9.0,8.0,12.0,-11.111111,50.000000
A School Without Walls,0.0,0.0,0.0,0.0,3.0,,inf
A-Tech High School,14.0,22.0,33.0,31.0,21.0,-6.060606,-32.258065
A. Philip Randolph Campus High School,185.0,223.0,216.0,176.0,195.0,-18.518519,10.795455
A.C.E. Academy for Scholars at the Geraldine Ferra,64.0,62.0,66.0,71.0,83.0,7.575758,16.901408
...,...,...,...,...,...,...,...
"Young Women's Leadership School, Queens",10.0,18.0,15.0,26.0,42.0,73.333333,61.538462
Zeta Charter School � Mount Eden,0.0,4.0,44.0,66.0,93.0,50.000000,40.909091
Zeta Charter School � Tremont Park,0.0,3.0,24.0,37.0,37.0,54.166667,0.000000
Zeta Charter Schools - Inwood,42.0,59.0,89.0,87.0,65.0,-2.247191,-25.287356


In [11]:
## Filter out rows where the base year is zero or where the change is infinite
valid_2021_2022 = school_ell_trends['Change_2021_2022'].replace([float('inf'), -float('inf')], pd.NA).dropna()
valid_2022_2023 = school_ell_trends['Change_2022_2023'].replace([float('inf'), -float('inf')], pd.NA).dropna()

## Calculate mean of the cleaned columns
avg_change_2021_2022 = valid_2021_2022.mean()
avg_change_2022_2023 = valid_2022_2023.mean() 

In [12]:
#average rate of change between 2021-2022 and 2022-2023 where you saw the beginning of the immigration influx
avg_change_2021_2022

8.634362922908757

In [13]:
#average rate of change between 2022-2023 and 2023-2024 where you saw an ongoing increase of immigrant children 
avg_change_2022_2023

25.659581642469096

In [14]:
##Highest to lowest based on the percent change in ENL enrollment between 2022 and 2023
import numpy as np

## Replace 0 with np.nan in 2022 before calculating percent change
school_ell_trends['Change_2022_2023'] = (
    (school_ell_trends[2023] - school_ell_trends[2022].replace(0, np.nan)) /
    school_ell_trends[2022].replace(0, np.nan)
) * 100

## Drop rows with NaN or inf in the change column
school_ell_trends_cleaned = school_ell_trends.replace([np.inf, -np.inf], np.nan).dropna(subset=['Change_2022_2023'])

## Sort from highest to lowest
school_ell_trends_sorted = school_ell_trends_cleaned.sort_values(by='Change_2022_2023', ascending=False)

## View top 10 results
school_ell_trends_sorted.head(11)

Start Year,2019,2020,2021,2022,2023,Change_2021_2022,Change_2022_2023
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
P.S. 064 Robert Simon,4.0,4.0,3.0,2.0,25.0,-33.333333,1150.0
P.S. 306 Ethan Allen,4.0,2.0,3.0,2.0,22.0,-33.333333,1000.0
P.S. 333 Manhattan School for Children,16.0,12.0,11.0,7.0,67.0,-36.363636,857.142857
Olympus Academy,4.0,3.0,3.0,1.0,8.0,-66.666667,700.0
The Riverside School for Makers and Artists,23.0,29.0,21.0,24.0,182.0,14.285714,658.333333
Lower Manhattan Community Middle School,17.0,20.0,14.0,6.0,42.0,-57.142857,600.0
University Neighborhood Middle School,11.0,8.0,5.0,4.0,27.0,-20.0,575.0
The Peck Slip School,8.0,8.0,13.0,12.0,80.0,-7.692308,566.666667
Mosaic Preparatory Academy,20.0,20.0,12.0,11.0,70.0,-8.333333,536.363636
P.S. 242 - The Young Diplomats Magnet Academy,11.0,13.0,7.0,14.0,88.0,100.0,528.571429


In [15]:
# Now let's do total enrollment per year
total_enrollment = school_ell_trends[[2019, 2020, 2021, 2022, 2023]].sum()
total_enrollment

Start Year
2019    141529.583333
2020    145869.500000
2021    146285.583333
2022    148040.916667
2023    162951.666667
dtype: float64

In [35]:
school_ell_trends_sorted.head(26)

Start Year,2019,2020,2021,2022,2023,Change_2021_2022,Change_2022_2023
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
P.S. 064 Robert Simon,4.0,4.0,3.0,2.0,25.0,-33.333333,1150.0
P.S. 306 Ethan Allen,4.0,2.0,3.0,2.0,22.0,-33.333333,1000.0
P.S. 333 Manhattan School for Children,16.0,12.0,11.0,7.0,67.0,-36.363636,857.142857
Olympus Academy,4.0,3.0,3.0,1.0,8.0,-66.666667,700.0
The Riverside School for Makers and Artists,23.0,29.0,21.0,24.0,182.0,14.285714,658.333333
Lower Manhattan Community Middle School,17.0,20.0,14.0,6.0,42.0,-57.142857,600.0
University Neighborhood Middle School,11.0,8.0,5.0,4.0,27.0,-20.0,575.0
The Peck Slip School,8.0,8.0,13.0,12.0,80.0,-7.692308,566.666667
Mosaic Preparatory Academy,20.0,20.0,12.0,11.0,70.0,-8.333333,536.363636
P.S. 242 - The Young Diplomats Magnet Academy,11.0,13.0,7.0,14.0,88.0,100.0,528.571429
