# Student Census Data

### Import Libraries

In [23]:
import pandas as pd
import sqlite3

### Constants, Pandas Data Frame, SQLite Database

In [24]:
#Filename of data source
STUDENT_CENSUS_DATA_CSV = "student_census_data.csv"

#Create the Pandas Data Frame
# student_census_data_frame = pd.read_csv(STUDENT_CENSUS_DATA_CSV, index_col=0)
student_census_data_frame = pd.read_csv(STUDENT_CENSUS_DATA_CSV)


#Connect the SQLite Database
student_census_data_base = sqlite3.connect("student_census_data.db")

### Display the raw data

In [25]:
student_census_data_frame

Unnamed: 0,Country,Region,DataYear,ClassGrade,Gender,Ageyears,Handed,Height_cm,Footlength_cm,Armspan_cm,...,Watching_TV_Hours,Paid_Work_Hours,Work_At_Home_Hours,Schoolwork_Pressure,Planned_Education_Level,Favorite_Music,Superpower,Preferred_Status,Role_Model_Type,Charity_Donation
0,USA,KY,2018,10,Male,15.0,Left-Handed,189,28,201,...,3,0,3,Some,Undergraduate degree,Rock and roll,Freeze time,Healthy,Doctor or nurse,Environment
1,USA,KY,2015,6,Male,11.0,Right-Handed,,,,...,,,,,,,,,,
2,USA,KY,2019,7,Male,13.0,Right-Handed,177.8,,,...,,,,,,,,,,
3,USA,KY,2012,12,Female,18.0,Left-Handed,171,24,171,...,4,13,3,Some,Graduate degree,Country,Fly,Happy,Teacher,International aid
4,USA,KY,2015,6,Male,11.0,Right-Handed,157,24,163,...,0,0,0,Very little,Graduate degree,Country,Freeze time,Happy,Relative,Other
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,USA,KY,2018,10,Male,14.0,Right-Handed,169,24,173,...,2,1,2,A lot,Graduate degree,Heavy metal,Fly,Happy,Relative,International aid
496,USA,KY,2012,6,Male,11.0,Right-Handed,24,34,35,...,655,56757,65567,,Less than high school,Pop,Fly,Healthy,Sports person,Environment
497,USA,KY,2012,6,Female,12.0,Right-Handed,167.6,24.13,165.1,...,1,0,3,A lot,Graduate degree,Rap/Hip hop,Telepathy,Healthy,Relative,Health
498,USA,KY,2019,12,Female,17.0,Right-Handed,167,23.5,162,...,40,27,10,Some,Graduate degree,Pop,Telepathy,Happy,Other,Health


## Select Columns and Cleanup

In [26]:
#Comment out the columns you don't want to drop
drop_columns = [
    'Country',
    'Region',
    'DataYear',
#     'ClassGrade',
#     'Gender',
    'Ageyears',
    'Languages_spoken',
    'Doing_Homework_Hours',
#     'Schoolwork_Pressure',
#     'Planned_Education_Level',
    'Handed', 
    'Height_cm', 
    'Footlength_cm', 
    'Armspan_cm',
    'Travel_to_School',
    'Travel_time_to_School',
    'Reaction_time',
    'Score_in_memory_game',
    'Watching_TV_Hours',
    'Paid_Work_Hours',
    'Work_At_Home_Hours',
    'Favourite_physical_activity',
    'Importance_reducing_pollution',
    'Importance_recycling_rubbish',
    'Importance_conserving_water',
    'Importance_saving_energy',
    'Importance_owning_computer',
    'Importance_Internet_access',
    'Left_Footlength_cm',
    'Longer_foot',
    'Index_Fingerlength_mm',
    'Ring_Fingerlength_mm',
    'Longer_Finger_Lefthand',
    'Favorite_Season',
    'Allergies',
    'Vegetarian',
    'Favorite_Food',
    'Beverage',
#     'Favorite_School_Subject',
    'Sleep_Hours_Schoolnight',
    'Sleep_Hours_Non_Schoolnight',
    'Social_Websites_Hours',
    'Texting_Messaging_Hours',
    'Computer_Use_Hours',
    'Birth_month',
    'Home_Occupants',
    'Home_Internet_Access',
    'Communication_With_Friends',
    'Text_Messages_Sent_Yesterday',
    'Text_Messages_Received_Yesterday',
    'Hanging_Out_With_Friends_Hours',
    'Talking_On_Phone_Hours',
    'Doing_Things_With_Family_Hours',
    'Outdoor_Activities_Hours',
    'Video_Games_Hours',
    'Favorite_Music',
    'Superpower',
    'Preferred_Status',
    'Role_Model_Type',
    'Charity_Donation'
]

In [53]:
# Make a new, cleaned dataframe by dropping the columns we don't care about and rows with missing values.

student_census_data_frame_cleaned = student_census_data_frame.drop(drop_columns, axis=1).dropna()
student_census_data_frame_cleaned.dtypes

ClassGrade                  int64
Gender                     object
Favorite_School_Subject    object
Schoolwork_Pressure        object
Planned_Education_Level    object
dtype: object

In [54]:
student_census_data_frame_cleaned

Unnamed: 0,ClassGrade,Gender,Favorite_School_Subject,Schoolwork_Pressure,Planned_Education_Level
0,10,Male,Mathematics and statistics,Some,Undergraduate degree
3,12,Female,English,Some,Graduate degree
4,6,Male,Physical education,Very little,Graduate degree
5,8,Female,Art,A lot,Graduate degree
7,12,Female,Mathematics and statistics,A lot,Graduate degree
...,...,...,...,...,...
495,10,Male,Mathematics and statistics,A lot,Graduate degree
496,6,Male,Physical education,,Less than high school
497,6,Female,Art,A lot,Graduate degree
498,12,Female,Science,Some,Graduate degree


## SQLite

### Queries (and their names)

In [34]:
student_census_data_query_1_name = 'A Lot of Pressure'

student_census_data_query_1 = """ 
    
    SELECT 
        Planned_Education_Level
    FROM 
        student_census_data_table
    WHERE 
        Schoolwork_Pressure IS "A lot"
    AND
        ClassGrade BETWEEN 9 AND 12
"""  

student_census_data_query_2_name = 'Very Little Pressure'

student_census_data_query_2 = """
    
    SELECT 
        Planned_Education_Level
    FROM 
        student_census_data_table
    WHERE 
        Schoolwork_Pressure IS "Very little"
    AND
        ClassGrade BETWEEN 9 AND 12
"""

In [49]:
#Write the dataframe to sqlite database
student_census_data_frame_cleaned.to_sql('student_census_data_table', student_census_data_base, if_exists='replace') 

### Agregation
####      Make a new dataframe that shows the Planned Education Level of students based on the level of Schoolwork pressure reported

In [50]:
#create a Pandas series from each SQL query
education_level_series_a_lot = pd.read_sql(student_census_data_query_1, student_census_data_base).Planned_Education_Level.value_counts()
education_level_series_very_little = pd.read_sql(student_census_data_query_2, student_census_data_base).Planned_Education_Level.value_counts()

#create dictionary from the two series
planned_education_level_dictionary = { student_census_data_query_1_name : education_level_series_a_lot, student_census_data_query_2_name : education_level_series_very_little } 
  
#create a pandas dataframe from the dictionary
planned_education_level_dataframe = pd.DataFrame(planned_education_level_dictionary)

In [51]:
planned_education_level_dataframe.fillna('0')

Unnamed: 0,A Lot of Pressure,Very Little Pressure
Graduate degree,66,13
High school,1,0
Less than high school,1,0
Other,13,0
Some college,1,1
Undergraduate degree,9,3


In [55]:
student_census_data_base.close()