In [4]:
import pandas as pd


### Read in the schools data and check the shape

In [7]:
schools = pd.read_csv('../data/schools_clean.csv')
schools.shape

(167, 29)

### More exploration with pandas
 - .value_counts()
 - .to_frame()
 - .reset_index()
 - .sort_values()
 - .isnull().sum()
 - .info()
 - .describe()

#### Let's remind ourselves of the data 

In [11]:
schools.head(2)

Unnamed: 0,level,name,zipcode,grade_k,grade_1,grade_2,grade_3,grade_4,grade_5,grade_6,...,hisp,p_islander,white,male,female,econ_disadv,disabled,limited_eng,lat,lng
0,Elementary School,A. Z. Kelley Elementary,37013,153.0,145.0,149.0,180.0,184.0,,,...,206,1.0,212.0,431,421,261,75.0,298.0,36.021817,-86.658848
1,Elementary School,Alex Green Elementary,37189,42.0,50.0,44.0,38.0,24.0,,,...,29,1.0,21.0,115,119,153,21.0,25.0,36.252961,-86.832229


In [13]:
type(schools)

pandas.core.frame.DataFrame

### `value_counts( )` tallies the count of each unique value for a column; here we look at the level column

In [16]:
schools.level.value_counts()

level
Elementary School              76
Middle School                  31
Charter                        30
High School                    17
Non-Traditional                 5
Special Education               3
Non-Traditional - Hybrid        2
Alternative Learning Center     2
Adult                           1
Name: count, dtype: int64

#### Let's save it into a variable and check the type

In [19]:
type_counts = schools.level.value_counts()

In [21]:
type_counts.head()


level
Elementary School    76
Middle School        31
Charter              30
High School          17
Non-Traditional       5
Name: count, dtype: int64

In [23]:
type(type_counts)

pandas.core.series.Series

In [25]:
#make series a df
type_counts = type_counts.to_frame()

In [27]:
type(type_counts)

pandas.core.frame.DataFrame

In [29]:
type_counts.head(2)

Unnamed: 0_level_0,count
level,Unnamed: 1_level_1
Elementary School,76
Middle School,31


### reset_index moves the current index to the right where it becomes a column and then adds a new index.

In [32]:
#reset index resets to 0-based index and moves existing index to a column
type_counts = type_counts.reset_index()
type_counts

Unnamed: 0,level,count
0,Elementary School,76
1,Middle School,31
2,Charter,30
3,High School,17
4,Non-Traditional,5
5,Special Education,3
6,Non-Traditional - Hybrid,2
7,Alternative Learning Center,2
8,Adult,1


### This can become a problem if you don't want your original index.

In [35]:
test = type_counts.reset_index()
test

Unnamed: 0,index,level,count
0,0,Elementary School,76
1,1,Middle School,31
2,2,Charter,30
3,3,High School,17
4,4,Non-Traditional,5
5,5,Special Education,3
6,6,Non-Traditional - Hybrid,2
7,7,Alternative Learning Center,2
8,8,Adult,1


### 'drop = True' drops the original index before replacing.

In [38]:
type_counts = type_counts.reset_index(drop=True)
type_counts

Unnamed: 0,level,count
0,Elementary School,76
1,Middle School,31
2,Charter,30
3,High School,17
4,Non-Traditional,5
5,Special Education,3
6,Non-Traditional - Hybrid,2
7,Alternative Learning Center,2
8,Adult,1


#### fix columns

In [41]:
type_counts.columns = ['school_type', 'count']
type_counts.head(3)

Unnamed: 0,school_type,count
0,Elementary School,76
1,Middle School,31
2,Charter,30


#### check the type again

In [44]:
type(type_counts)

pandas.core.frame.DataFrame

### `sort_values( )` sorts the data frame by the specified column or columns
 - by default will sort smallest to largest (`ascending = True`)

In [47]:
type_counts.sort_values(by = 'count', ascending = False)

Unnamed: 0,school_type,count
0,Elementary School,76
1,Middle School,31
2,Charter,30
3,High School,17
4,Non-Traditional,5
5,Special Education,3
6,Non-Traditional - Hybrid,2
7,Alternative Learning Center,2
8,Adult,1


In [49]:
schools.isnull().sum()

level            0
name             0
zipcode          0
grade_k         84
grade_1         84
grade_2         83
grade_3         83
grade_4         84
grade_5        111
grade_6        112
grade_7        111
grade_8        110
grade_9        135
grade_10       133
grade_11       135
grade_12       135
native_amer     73
asian           26
black            0
hisp             0
p_islander      97
white            1
male             0
female           0
econ_disadv      0
disabled         1
limited_eng      7
lat              1
lng              1
dtype: int64

In [51]:
schools.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 167 entries, 0 to 166
Data columns (total 29 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   level        167 non-null    object 
 1   name         167 non-null    object 
 2   zipcode      167 non-null    int64  
 3   grade_k      83 non-null     float64
 4   grade_1      83 non-null     float64
 5   grade_2      84 non-null     float64
 6   grade_3      84 non-null     float64
 7   grade_4      83 non-null     float64
 8   grade_5      56 non-null     float64
 9   grade_6      55 non-null     float64
 10  grade_7      56 non-null     float64
 11  grade_8      57 non-null     float64
 12  grade_9      32 non-null     float64
 13  grade_10     34 non-null     float64
 14  grade_11     32 non-null     float64
 15  grade_12     32 non-null     float64
 16  native_amer  94 non-null     float64
 17  asian        141 non-null    float64
 18  black        167 non-null    int64  
 19  hisp    

In [53]:
schools.describe()

Unnamed: 0,zipcode,grade_k,grade_1,grade_2,grade_3,grade_4,grade_5,grade_6,grade_7,grade_8,...,hisp,p_islander,white,male,female,econ_disadv,disabled,limited_eng,lat,lng
count,167.0,83.0,83.0,84.0,84.0,83.0,56.0,55.0,56.0,57.0,...,167.0,70.0,166.0,167.0,167.0,167.0,166.0,160.0,166.0,166.0
mean,37170.245509,82.807229,83.542169,79.869048,80.571429,80.240964,110.928571,121.563636,117.732143,112.368421,...,146.11976,1.728571,139.156627,263.532934,251.42515,205.994012,59.174699,137.91875,36.154412,-86.745979
std,71.833263,35.392041,35.909202,36.034047,36.657084,36.311522,57.490824,67.783633,65.193834,60.779059,...,168.348885,1.141083,146.542246,181.53727,169.65537,134.603277,40.21005,160.475315,0.071348,0.07658
min,37013.0,1.0,1.0,1.0,2.0,3.0,1.0,2.0,4.0,1.0,...,1.0,1.0,1.0,3.0,3.0,6.0,2.0,1.0,36.020174,-86.95805
25%,37138.0,56.5,52.5,55.75,51.0,57.0,73.25,74.5,78.0,78.0,...,27.0,1.0,30.25,148.0,141.0,120.0,33.0,23.0,36.096979,-86.801846
50%,37207.0,81.0,88.0,78.5,80.5,75.0,104.0,118.0,119.5,104.0,...,78.0,1.0,83.5,227.0,222.0,191.0,50.0,68.5,36.156187,-86.746897
75%,37211.0,107.5,111.0,101.5,106.0,104.0,151.0,172.5,155.5,158.0,...,202.5,2.0,204.5,334.0,324.0,262.0,74.75,211.75,36.19837,-86.699883
max,37228.0,156.0,178.0,185.0,180.0,184.0,231.0,271.0,255.0,242.0,...,882.0,6.0,826.0,1111.0,1137.0,810.0,283.0,828.0,36.318151,-86.578658


# End of Instruction

### Use value_counts to find the count of each zipcode

In [133]:
schools.zipcode.value_counts()

zipcode
37013    21
37211    15
37207    15
37209    15
37206    12
37210     9
37214     9
37216     8
37115     8
37208     7
37203     6
37076     5
37217     5
37218     4
37205     4
37215     3
37072     3
37204     3
37138     3
37212     2
37189     2
37220     2
37080     2
37221     2
37027     1
37228     1
Name: count, dtype: int64

### Save the series created by value_counts to a variable called 'zip_freq'

In [171]:
zip_freq = schools.zipcode.value_counts()

### Change zip_freq from a series to a dataframe

In [175]:
zip_freq = zip_freq.to_frame()

### Reset the index of the zip_freq dataframe

In [178]:
zip_freq = zip_freq.reset_index()
zip_freq

Unnamed: 0,zipcode,count
0,37013,21
1,37211,15
2,37207,15
3,37209,15
4,37206,12
5,37210,9
6,37214,9
7,37216,8
8,37115,8
9,37208,7


### Sort the zipcodes so they are in numerical order and save the datafrome to itself

In [182]:
zip_freq = zip_freq.sort_values(by = 'zipcode', ascending = True)
zip_freq

Unnamed: 0,zipcode,count
0,37013,21
24,37027,1
16,37072,3
11,37076,5
22,37080,2
8,37115,8
18,37138,3
20,37189,2
10,37203,6
17,37204,3


### You index is now out of order.  Reset the index again this time dropping the original index

In [184]:
zip_freq.reset_index(drop = True)

Unnamed: 0,zipcode,count
0,37013,21
1,37027,1
2,37072,3
3,37076,5
4,37080,2
5,37115,8
6,37138,3
7,37189,2
8,37203,6
9,37204,3
