# School Facility Reopening

## Contents:
### 01. Import libraries and data
### 02. Explore data
### 03. Wrangle data
### 04. Export data

## 01. Import libraries and data

In [43]:
# Import libraries

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib
import os

In [44]:
# Ensure the charts created are displayed in the notebook without the need to "call" them specifically.

%matplotlib inline

In [45]:
# Create path shortcut

path = r'C:\Users\lrutl\OneDrive\Desktop\Advanced Analytics & Dashboard Design\COVID19 School ReOpening and Students'

In [46]:
# Import data

sf = pd.read_csv(os.path.join(path, '02 Data', 'Original data', 'School_Facility_ReOpening_Survey.csv'))

## 02. Explore data

In [47]:
sf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6042 entries, 0 to 6041
Data columns (total 26 columns):
 #   Column                        Non-Null Count  Dtype 
---  ------                        --------------  ----- 
 0   Week                          6042 non-null   object
 1   DistrictName                  6042 non-null   object
 2   DistrictCode                  6042 non-null   int64 
 3   DistrictOrganizationId        6042 non-null   int64 
 4   Status                        5241 non-null   object
 5   Model_Elementary              5228 non-null   object
 6   Model_Middle                  5214 non-null   object
 7   Model_High                    5186 non-null   object
 8   GradeLevels_Remote_Phasein    916 non-null    object
 9   StudentGroups_Remote_Phasein  1033 non-null   object
 10  Other_Remote_Phasein          258 non-null    object
 11  Enrollment_Elementary         6042 non-null   int64 
 12  FullTimeALE_Elementary        6042 non-null   int64 
 13  OptOut_Elementary 

In [48]:
# Check all columns will be displayed in notebook

pd.get_option("display.max_columns")

50

In [49]:
# Update max columns displayed to allow for all columns in rc df

pd.set_option("display.max_columns", 50)

In [50]:
# Check all columns are displayed

sf.head()

Unnamed: 0,Week,DistrictName,DistrictCode,DistrictOrganizationId,Status,Model_Elementary,Model_Middle,Model_High,GradeLevels_Remote_Phasein,StudentGroups_Remote_Phasein,Other_Remote_Phasein,Enrollment_Elementary,FullTimeALE_Elementary,OptOut_Elementary,TypicalInPerson_Elementary,Weekly_Elementary,Enrollment_Middle,FullTimeALE_Middle,OptOut_Middle,TypicalInPerson_Middle,Weekly_Middle,Enrollment_High,FullTimeALE_High,OptOut_High,TypicalInPerson_High,Weekly_High
0,Week of 1/18/2021,Adna School District,21226,100011,Complete,Typical/Traditional In-Person,Partial In-Person for All Students,Partial In-Person for All Students,,,,216,0,0,216,216,155,0,0,90,155,212,0,0,130,212
1,Week of 1/18/2021,Almira School District,22017,100012,Complete,Typical/Traditional In-Person,Typical/Traditional In-Person,Partial In-Person for All Students,,,,48,0,0,48,48,21,0,0,21,21,20,0,0,20,20
2,Week of 1/18/2021,Arlington School District,31016,100014,Complete,Phase-In,Small Groups + Remote,Small Groups + Remote,"Grade K,Grade 1,Grade 2,Grade 3","English Language Learners,Students with Disabi...",Connectivity Needs,2263,0,0,1511,1511,1296,0,0,74,74,1702,0,0,120,120
3,Week of 1/18/2021,Asotin-Anatone School District,2420,100015,Complete,Typical/Traditional In-Person,Typical/Traditional In-Person,Partial In-Person for All Students,,,,256,0,0,246,246,144,0,0,136,136,192,0,0,93,93
4,Week of 1/18/2021,Auburn School District,17408,100016,Complete,Small Groups + Remote,Small Groups + Remote,Small Groups + Remote,"Grade K,Grade 1,Grade 2,Grade 3,Grade 4,Grade ...","Low Income,Students with Disabilities",,7295,0,0,40,80,3865,0,0,15,30,5421,0,0,18,36


In [51]:
# Check df shape

sf.shape

(6042, 26)

## 03. Wrangle data

In [52]:
# Check status is complete

sf.value_counts('Status')

Status
Complete    5241
dtype: int64

In [53]:
# Drop data that does not have a status of complete

sf = sf[sf['Status'] == 'Complete']
sf.shape

(5241, 26)

In [54]:
# Rename Week column for clarity

sf.rename(columns = {'Week Of' : 'Week'}, inplace = True)

In [55]:
sf.head()

Unnamed: 0,Week,DistrictName,DistrictCode,DistrictOrganizationId,Status,Model_Elementary,Model_Middle,Model_High,GradeLevels_Remote_Phasein,StudentGroups_Remote_Phasein,Other_Remote_Phasein,Enrollment_Elementary,FullTimeALE_Elementary,OptOut_Elementary,TypicalInPerson_Elementary,Weekly_Elementary,Enrollment_Middle,FullTimeALE_Middle,OptOut_Middle,TypicalInPerson_Middle,Weekly_Middle,Enrollment_High,FullTimeALE_High,OptOut_High,TypicalInPerson_High,Weekly_High
0,Week of 1/18/2021,Adna School District,21226,100011,Complete,Typical/Traditional In-Person,Partial In-Person for All Students,Partial In-Person for All Students,,,,216,0,0,216,216,155,0,0,90,155,212,0,0,130,212
1,Week of 1/18/2021,Almira School District,22017,100012,Complete,Typical/Traditional In-Person,Typical/Traditional In-Person,Partial In-Person for All Students,,,,48,0,0,48,48,21,0,0,21,21,20,0,0,20,20
2,Week of 1/18/2021,Arlington School District,31016,100014,Complete,Phase-In,Small Groups + Remote,Small Groups + Remote,"Grade K,Grade 1,Grade 2,Grade 3","English Language Learners,Students with Disabi...",Connectivity Needs,2263,0,0,1511,1511,1296,0,0,74,74,1702,0,0,120,120
3,Week of 1/18/2021,Asotin-Anatone School District,2420,100015,Complete,Typical/Traditional In-Person,Typical/Traditional In-Person,Partial In-Person for All Students,,,,256,0,0,246,246,144,0,0,136,136,192,0,0,93,93
4,Week of 1/18/2021,Auburn School District,17408,100016,Complete,Small Groups + Remote,Small Groups + Remote,Small Groups + Remote,"Grade K,Grade 1,Grade 2,Grade 3,Grade 4,Grade ...","Low Income,Students with Disabilities",,7295,0,0,40,80,3865,0,0,15,30,5421,0,0,18,36


In [56]:
sf.describe()

Unnamed: 0,DistrictCode,DistrictOrganizationId,Enrollment_Elementary,FullTimeALE_Elementary,OptOut_Elementary,TypicalInPerson_Elementary,Weekly_Elementary,Enrollment_Middle,FullTimeALE_Middle,OptOut_Middle,TypicalInPerson_Middle,Weekly_Middle,Enrollment_High,FullTimeALE_High,OptOut_High,TypicalInPerson_High,Weekly_High
count,5241.0,5241.0,5241.0,5241.0,5241.0,5241.0,5241.0,5241.0,5241.0,5241.0,5241.0,5241.0,5241.0,5241.0,5241.0,5241.0,5241.0
mean,22096.754818,98974.317306,1618.17573,35.975005,266.01908,836.904026,927.199008,818.585194,24.322648,119.654074,302.129365,386.870826,1090.26121,40.576608,187.180691,348.487312,461.100744
std,11063.447095,12069.410435,3149.268015,166.683063,748.404331,1879.151308,1730.769348,1445.673819,132.629461,334.60006,635.957094,774.941425,1941.958351,169.601494,888.085722,784.206905,996.630298
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,14068.0,100082.0,110.0,0.0,1.0,66.0,72.0,53.0,0.0,0.0,22.0,25.0,54.0,0.0,0.0,19.0,20.0
50%,22017.0,100161.0,404.0,0.0,22.0,228.0,276.0,228.0,0.0,10.0,75.0,95.0,271.0,0.0,10.0,79.0,98.0
75%,32123.0,100242.0,1625.0,0.0,154.0,831.0,1001.0,791.0,0.0,70.0,271.0,361.0,1126.0,12.0,96.0,310.0,422.0
max,39209.0,106257.0,66202.0,2596.0,7500.0,38647.0,16456.0,12153.0,2238.0,3496.0,6596.0,7063.0,16000.0,2189.0,35460.0,8710.0,10000.0


In [58]:
# Remove "Week of " in Week column

sf['Week'] = sf['Week'].str.lstrip('Week of ')

In [59]:
sf.head()

Unnamed: 0,Week,DistrictName,DistrictCode,DistrictOrganizationId,Status,Model_Elementary,Model_Middle,Model_High,GradeLevels_Remote_Phasein,StudentGroups_Remote_Phasein,Other_Remote_Phasein,Enrollment_Elementary,FullTimeALE_Elementary,OptOut_Elementary,TypicalInPerson_Elementary,Weekly_Elementary,Enrollment_Middle,FullTimeALE_Middle,OptOut_Middle,TypicalInPerson_Middle,Weekly_Middle,Enrollment_High,FullTimeALE_High,OptOut_High,TypicalInPerson_High,Weekly_High
0,1/18/2021,Adna School District,21226,100011,Complete,Typical/Traditional In-Person,Partial In-Person for All Students,Partial In-Person for All Students,,,,216,0,0,216,216,155,0,0,90,155,212,0,0,130,212
1,1/18/2021,Almira School District,22017,100012,Complete,Typical/Traditional In-Person,Typical/Traditional In-Person,Partial In-Person for All Students,,,,48,0,0,48,48,21,0,0,21,21,20,0,0,20,20
2,1/18/2021,Arlington School District,31016,100014,Complete,Phase-In,Small Groups + Remote,Small Groups + Remote,"Grade K,Grade 1,Grade 2,Grade 3","English Language Learners,Students with Disabi...",Connectivity Needs,2263,0,0,1511,1511,1296,0,0,74,74,1702,0,0,120,120
3,1/18/2021,Asotin-Anatone School District,2420,100015,Complete,Typical/Traditional In-Person,Typical/Traditional In-Person,Partial In-Person for All Students,,,,256,0,0,246,246,144,0,0,136,136,192,0,0,93,93
4,1/18/2021,Auburn School District,17408,100016,Complete,Small Groups + Remote,Small Groups + Remote,Small Groups + Remote,"Grade K,Grade 1,Grade 2,Grade 3,Grade 4,Grade ...","Low Income,Students with Disabilities",,7295,0,0,40,80,3865,0,0,15,30,5421,0,0,18,36


In [69]:
sf.value_counts('Week')

Week
1/25/2021    308
3/29/2021    289
3/8/2021     287
2/22/2021    285
3/1/2021     285
4/12/2021    283
5/10/2021    281
5/17/2021    279
4/26/2021    278
4/19/2021    278
5/31/2021    278
2/15/2021    275
3/15/2021    273
2/8/2021     273
1/18/2021    271
5/3/2021     258
2/1/2021     257
5/24/2021    252
3/22/2021    251
dtype: int64

## 04. Export data

In [70]:
# Export data as csv file

sf.to_csv(os.path.join(path,'02 Data','Prepared data','School_Facility_ReOpening_Survey - clean.csv'))