In [None]:
# Import Library Dependencies
from sqlalchemy import create_engine, engine, inspect
from config import user, password

import datetime as dt
import numpy as np
import pandas as pd
import os

In [None]:
# Establish CSV Data Path Reference
csvStudentRecords = '2019-06-22-StudentMobility.csv'
csvDistrictSchools = '2019-06-26-DistrictSchools.csv'

csvUploads = {
    'records':os.path.join('Datasets', csvStudentRecords),
    'schools':os.path.join('Datasets', csvDistrictSchools)
}

In [None]:
# Define & Rearrange Desired Column Headers
import_headers = [
    'NID',  # Student Id
    'GR',   # Grade`
    'SC',   # School Code
    'PR',   # School Program
    'ED',   # Entry Date
    'LD',   # Leave Date
    'ER'    # Exit Reason**
]

In [None]:
# Read CSV Data Into DataFrame
master_df = pd.read_csv(csvUploads['records'], low_memory=False)[import_headers]
master_df.head(10)

In [None]:
# Rename Column Headers to Match Database Field Names
update_headers = {
    'NID': 'student_id',
    'GR': 'grade_level',
    'SC': 'school_id',
    'PR': 'program_id',
    'ED': 'entry_date',
    'LD': 'leave_date',
    'ER': 'exit_reason'
}

master_df = master_df.rename(columns=update_headers)
master_df.head()

In [None]:
# Preview Dataset -- Check for Missing Data (Data Value Should Be Equal)
master_df.count()

In [None]:
# Reduce/Truncate Dataset
# Truncate Dataset to Records Containing an Exit Reason
reduced_df = master_df.loc[master_df['exit_reason'].isnull()==False]

# Fill Remaining Missing Data In Program_Id Column With String Values For Later Data Conversion
reduced_df['program_id'] = reduced_df['program_id'].fillna('Gen')
reduced_df.count()

In [None]:
# Specify Required Column Data Types
fmt_dates = [
    'entry_date', 
    'leave_date'
]

dtype = {
    'student_id':int,
    'grade_level':int,
    'school_id':int,
    'program_id':str,
    'exit_reason':int
}

# Preview Current Column Data Types
reduced_df.dtypes

In [None]:
# Data Conversion on DataFrame Colums to Match Those Required By MySQL Database
converted_df = reduced_df.astype(dtype, errors='ignore')
converted_df[fmt_dates] = converted_df[fmt_dates].apply(pd.to_datetime, errors='ignore')
converted_df.dtypes

In [None]:
# Create DataFrames For Initial MySQL Database Upload
# <Create> Students DataFrame Upload: 
students_df = pd.DataFrame()
students_df['id'] = converted_df['student_id'].unique()
students_df['fname'] = 'Protected'
students_df['lname'] = 'Protected'
students_df['updated_on'] = dt.datetime.today()
students_df['updated_by'] = 'Admin_User1'

students_df.head()

In [None]:
# <Create> Schools DataFrame Upload:
school_headers = [
    'id',
    'name',
    'address',
    'city',
    'state',
    'zipcode',
    'lat',
    'lon'
]
schools_df = pd.read_csv(csvUploads['schools'])[school_headers]
schools_df.head()

In [None]:
# <Create> Student Records DataFrame Upload
record_headers = [
    'student_id',
    'school_id',
    'program_id',
    'grade_level',
    'entry_date',
    'leave_date',
    'exit_reason'
]

records_df = converted_df[record_headers]
records_df.head()

In [None]:
# <Create> School Programs DataFrame Upload
programs_df = pd.DataFrame()
programs_df['id'] = converted_df['program_id'].unique()
programs_df['name'] = 'Protected'
programs_df

In [None]:
# Upload Student Profile Data to 'students' Table in student_mobility database
MySQL_DB_Connection = f'{user}:{password}@localhost/student_mobility'
    
engine = create_engine(f'mysql://{MySQL_DB_Connection}', echo=True)
engine.table_names()

In [None]:
# [Upload #1] Upload Student Table Data
students_df.to_sql(
    name='students', 
    con=engine, 
    if_exists='append',
    index=False
)

In [None]:
# [Upload #2] Upload School Table Data
schools_df.to_sql(
    name='schools',
    con=engine,
    if_exists='append',
    index=False
)

In [None]:
# [Upload #3] Upload School Program Data 
programs_df.to_sql(
    name='programs',
    con=engine,
    if_exists='append',
    index=False
)

In [None]:
# [Upload #4] Upload Student Record Data
records_df.to_sql(
    name='records',
    con=engine,
    if_exists='append',
    index=False
)

In [None]:
print('Initial MySQL Data Upload: <Complete>')