# Joining Common Core of Data (CCD) From National Center for Education Statistics as well as Geo data for school and district locations.

In [1]:
import pandas as pd
import numpy as np
pd.options.mode.chained_assignment = None  # default='warn'

## Import ElSi Files

Set encoding to `latin_1` and low memory to `False` because Pandas wanted to encode as `utf-8` and `us_ascii` did not work either, Though that's what the original files were encoded in.

In [2]:
elsi_raw = pd.read_csv('../data/school_based/ELSI.csv')

## ElSi files concatination

In [3]:
elsi_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1984 entries, 0 to 1983
Data columns (total 66 columns):
 #   Column                                                             Non-Null Count  Dtype 
---  ------                                                             --------------  ----- 
 0   School Name                                                        1984 non-null   object
 1   State Name [Public School] Latest available year                   1984 non-null   object
 2   School Name [Public School] 2021-22                                1984 non-null   object
 3   School Name [Public School] 2020-21                                1984 non-null   object
 4   School Name [Public School] 2018-19                                1984 non-null   object
 5   School Name [Public School] 2017-18                                1984 non-null   object
 6   Agency Name [Public School] 2021-22                                1984 non-null   object
 7   Agency Name [Public School] 2020-

In [4]:
# Slice ElSi data into dataframes by year.
elsi_18 = elsi_raw.loc[:, elsi_raw.columns.str.contains('2017-18')]
elsi_19 = elsi_raw.loc[:, elsi_raw.columns.str.contains('2018-19')]
elsi_21 = elsi_raw.loc[:, elsi_raw.columns.str.contains('2020-21')]
elsi_22 = elsi_raw.loc[:, elsi_raw.columns.str.contains('2021-22')]

In [5]:
# List for column renaming
elsi_cols = ['school_name', 
             'system_name', 
             'school_type', 
             'charter',
             'magnet',
             'locale',
             'title_1',
             'lat',
             'long',
             'school',
             'system',
             'virtual',
             'school_lvl',
             'tot_enrolled',
             'fte_teachers',
             'stu_tchr_ratio'
             ]

In [6]:
# Due to an unforseeable quirk, this picked up three additional columns
elsi_18 = elsi_18.drop(['School Level (SY 2017-18 onward) [Public School] 2021-22',
                       'School Level (SY 2017-18 onward) [Public School] 2020-21',
                       'School Level (SY 2017-18 onward) [Public School] 2018-19'],
                       axis = 1)
# Apply elsi_cols list to replace column names
elsi_18.columns = elsi_cols

# Adding Year column
elsi_18['year'] = 2018

# Apply elsi_cols list to 2019
elsi_19.columns = elsi_cols

# Adding 2019 Year column
elsi_19['year'] = 2019

# Apply elsi_cols list to 2021
elsi_21.columns = elsi_cols

# Adding 2021 Year column
elsi_21['year'] = 2021

# Apply elsi_cols list to 2022
elsi_22.columns = elsi_cols

# Adding 2022 Year column
elsi_22['year'] = 2022


In [7]:
# Concatonate all ElSi Data to long dataframe
elsi = pd.concat([elsi_18, elsi_19, elsi_21, elsi_22])

In [8]:
elsi_order = (['year',
            'system',
            'school',
            'system_name',
            'school_name', 
            'school_lvl',
            'tot_enrolled', 
            'fte_teachers', 
            'stu_tchr_ratio',
            'school_type', 
            'magnet',
            'charter',
            'virtual',
            'title_1',
            'lat', 
            'long', 
            'locale'
             ])
elsi = elsi[elsi_order]

In [14]:
elsi[elsi['system_name'] == 'Monroe County']

Unnamed: 0,year,system,school,system_name,school_name,school_lvl,tot_enrolled,fte_teachers,stu_tchr_ratio,school_type,magnet,charter,virtual,title_1,lat,long,locale
332,2018,TN-00620,TN-00620-0010,Monroe County,Coker Creek Elementary,Elementary,93,7.0,13.29,1-Regular school,2-No,2-No,NOTVIRTUAL,1-Yes,35.259039,-84.295372,43-Rural: Remote
1074,2018,TN-00620,TN-00620-0018,Monroe County,Madisonville Intermediate School,Elementary,496,31.0,16.0,1-Regular school,2-No,2-No,NOTVIRTUAL,1-Yes,35.537907,-84.35457,41-Rural: Fringe
1075,2018,TN-00620,TN-00620-0020,Monroe County,Madisonville Middle School,Middle,512,32.0,16.0,1-Regular school,2-No,2-No,NOTVIRTUAL,2-No,35.536817,-84.356331,41-Rural: Fringe
1076,2018,TN-00620,TN-00620-0025,Monroe County,Madisonville Primary,Elementary,504,35.0,14.4,1-Regular school,2-No,2-No,NOTVIRTUAL,1-Yes,35.521402,-84.367458,32-Town: Distant
1520,2018,TN-00620,TN-00620-0035,Monroe County,Rural Vale Elementary,Elementary,243,16.0,15.19,1-Regular school,2-No,2-No,NOTVIRTUAL,1-Yes,35.324946,-84.373204,42-Rural: Distant
1554,2018,TN-00620,TN-00620-0037,Monroe County,Sequoyah High School,High,942,63.9,14.74,1-Regular school,2-No,2-No,NOTVIRTUAL,2-No,35.548279,-84.321014,41-Rural: Fringe
1710,2018,TN-00620,TN-00620-0040,Monroe County,Sweetwater High School,High,640,38.9,16.45,1-Regular school,2-No,2-No,NOTVIRTUAL,2-No,35.597159,-84.47066,31-Town: Fringe
1727,2018,TN-00620,TN-00620-0045,Monroe County,Tellico Plains Elementary,Elementary,353,23.0,15.35,1-Regular school,2-No,2-No,NOTVIRTUAL,1-Yes,35.381641,-84.307709,42-Rural: Distant
1728,2018,TN-00620,TN-00620-0050,Monroe County,Tellico Plains High School,High,460,28.6,16.08,1-Regular school,2-No,2-No,NOTVIRTUAL,2-No,35.369062,-84.308704,42-Rural: Distant
1729,2018,TN-00620,TN-00620-0053,Monroe County,Tellico Plains Junior High School,Middle,297,16.0,18.56,1-Regular school,2-No,2-No,NOTVIRTUAL,1-Yes,35.382229,-84.306449,42-Rural: Distant


### Exporting cleaned ElSi file as elsi_clean.pkl 🥒 to the school_based subdirectory in my data folder.

In [10]:
# originally thought to use scv.  Mews taught me the ways of the .pkl.  I shall use it here.
elsi.to_pickle('../data/school_based/elsi_clean.pkl')  