In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

### Read in the schools data and check the shape

In [5]:
schools = pd.read_csv('../data/schools_clean.csv')
schools.shape

(167, 29)

### More exploration with pandas
 - .value_counts()
 - .to_frame()
 - .reset_index()
 - .describe()
 - .info()
 - .isnull().sum()

#### Let's remind ourselves of the data 

In [6]:
schools.head(2)

Unnamed: 0,level,name,zipcode,grade_k,grade_1,grade_2,grade_3,grade_4,grade_5,grade_6,...,hisp,p_islander,white,male,female,econ_disadv,disabled,limited_eng,lat,lng
0,Elementary School,A. Z. Kelley Elementary,37013,153.0,145.0,149.0,180.0,184.0,,,...,206,1.0,212.0,431,421,261,75.0,298.0,36.021817,-86.658848
1,Elementary School,Alex Green Elementary,37189,42.0,50.0,44.0,38.0,24.0,,,...,29,1.0,21.0,115,119,153,21.0,25.0,36.252961,-86.832229


### `value_counts( )` tallies the count of each unique value for a column; here we look at the level column

In [7]:
schools.level.value_counts()

Elementary School              76
Middle School                  31
Charter                        30
High School                    17
Non-Traditional                 5
Special Education               3
Non-Traditional - Hybrid        2
Alternative Learning Center     2
Adult                           1
Name: level, dtype: int64

#### Let's save it into a variable and check the type

In [8]:
type_counts = schools.level.value_counts()

In [9]:
type_counts.head()

Elementary School    76
Middle School        31
Charter              30
High School          17
Non-Traditional       5
Name: level, dtype: int64

In [10]:
type(type_counts)

pandas.core.series.Series

In [11]:
#make series a df
type_counts = type_counts.to_frame()

In [12]:
type(type_counts)

pandas.core.frame.DataFrame

In [13]:
type_counts.head(2)

Unnamed: 0,level
Elementary School,76
Middle School,31


In [14]:
#reset index resets to 0-based index and moves existing index to a column
type_counts = type_counts.reset_index()
type_counts

Unnamed: 0,index,level
0,Elementary School,76
1,Middle School,31
2,Charter,30
3,High School,17
4,Non-Traditional,5
5,Special Education,3
6,Non-Traditional - Hybrid,2
7,Alternative Learning Center,2
8,Adult,1


#### fix columns

In [15]:
type_counts.columns = ['school_type', 'count']
type_counts.head(3)

Unnamed: 0,school_type,count
0,Elementary School,76
1,Middle School,31
2,Charter,30


#### check the type again

In [16]:
type(type_counts)

pandas.core.frame.DataFrame

### `sort_values( )` sorts the data frame by the specified column or columns
 - by default will sort smallest to largest (`ascending = True`)

In [22]:
type_counts.sort_values(by = 'count', ascending = False)

Unnamed: 0,school_type,count
0,Elementary School,76
1,Middle School,31
2,Charter,30
3,High School,17
4,Non-Traditional,5
5,Special Education,3
6,Non-Traditional - Hybrid,2
7,Alternative Learning Center,2
8,Adult,1
