# Joining Common Core of Data (CCD) From National Center for Education Statistics as well as Geo data for school and district locations.

In [2]:
import pandas as pd
import numpy as np
pd.options.mode.chained_assignment = None  # default='warn'

## Import ElSi Files

Set encoding to `latin_1` and low memory to `False` because Pandas wanted to encode as `utf-8` and `us_ascii` did not work either, Though that's what the original files were encoded in.

In [3]:
elsi_raw = pd.read_csv('../data/school_based/ELSI.csv')

## ElSi files concatination

In [4]:
elsi_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1984 entries, 0 to 1983
Data columns (total 66 columns):
 #   Column                                                             Non-Null Count  Dtype 
---  ------                                                             --------------  ----- 
 0   School Name                                                        1984 non-null   object
 1   State Name [Public School] Latest available year                   1984 non-null   object
 2   School Name [Public School] 2021-22                                1984 non-null   object
 3   School Name [Public School] 2020-21                                1984 non-null   object
 4   School Name [Public School] 2018-19                                1984 non-null   object
 5   School Name [Public School] 2017-18                                1984 non-null   object
 6   Agency Name [Public School] 2021-22                                1984 non-null   object
 7   Agency Name [Public School] 2020-

In [5]:
# Slice ElSi data into dataframes by year.
elsi_18 = elsi_raw.loc[:, elsi_raw.columns.str.contains('2017-18')]
elsi_19 = elsi_raw.loc[:, elsi_raw.columns.str.contains('2018-19')]
elsi_21 = elsi_raw.loc[:, elsi_raw.columns.str.contains('2020-21')]
elsi_22 = elsi_raw.loc[:, elsi_raw.columns.str.contains('2021-22')]

In [6]:
# List for column renaming
elsi_cols = ['school_name', 
             'system_name', 
             'school_type', 
             'charter',
             'magnet',
             'locale',
             'title_1',
             'lat',
             'long',
             'school',
             'system',
             'virtual',
             'school_lvl',
             'tot_enrolled',
             'fte_teachers',
             'stu_tchr_ratio'
             ]

In [7]:
# Due to an unforseeable quirk, this picked up three additional columns
elsi_18 = elsi_18.drop(['School Level (SY 2017-18 onward) [Public School] 2021-22',
                       'School Level (SY 2017-18 onward) [Public School] 2020-21',
                       'School Level (SY 2017-18 onward) [Public School] 2018-19'],
                       axis = 1)
# Apply elsi_cols list to replace column names
elsi_18.columns = elsi_cols

# Adding Year column
elsi_18['year'] = 2018

# Apply elsi_cols list to 2019
elsi_19.columns = elsi_cols

# Adding 2019 Year column
elsi_19['year'] = 2019

# Apply elsi_cols list to 2021
elsi_21.columns = elsi_cols

# Adding 2021 Year column
elsi_21['year'] = 2021

# Apply elsi_cols list to 2022
elsi_22.columns = elsi_cols

# Adding 2022 Year column
elsi_22['year'] = 2022


In [8]:
# Concatonate all ElSi Data to long dataframe
elsi = pd.concat([elsi_18, elsi_19, elsi_21, elsi_22])

In [11]:
elsi_order = (['year',
            'system',
            'school',
            'system_name',
            'school_name', 
            'school_lvl',
            'tot_enrolled', 
            'fte_teachers', 
            'stu_tchr_ratio',
            'school_type', 
            'magnet',
            'charter',
            'virtual',
            'title_1',
            'lat', 
            'long', 
            'locale'
             ])