In [1]:
import numpy as np
import pandas as pd
from datetime import date


In [2]:
# local connection information
import local_db
connection = local_db.connection()


In [3]:
today = date.today()
today_str = today.strftime('%Y%m%d')
print(today_str)

20180522


In [4]:
sql_str = "SELECT * FROM TRANSCRIPTDETAIL WHERE " + \
          "ACADEMIC_YEAR = '1999' " + \
          "AND ACADEMIC_TERM = 'Transfer' "
df_td = pd.read_sql_query(sql_str, connection)


In [5]:
print(df_td.shape)

(3702, 75)


In [6]:
df_td = df_td[['PEOPLE_CODE_ID', 'ACADEMIC_YEAR', 'ACADEMIC_TERM', 'ACADEMIC_SESSION', 
             'EVENT_ID', 'EVENT_SUB_TYPE', 'SECTION', 'EVENT_MED_NAME', 
             'ORG_CODE_ID', 'CREDIT_TYPE', 'CREDIT', 'FINAL_GRADE', 
             'REFERENCE_EVENT_ID', 'REFERENCE_SUB_TYPE', 
             ]]


In [7]:
print(df_td.shape)

(3702, 14)


In [8]:
# create active student list from 2-year rolling window
two_years_ago = today.year - 2
sql_str = "SELECT PEOPLE_CODE_ID FROM ACADEMIC WHERE " + \
          f"ACADEMIC_YEAR > '{two_years_ago}' " + \
          "AND PRIMARY_FLAG = 'Y' " + \
          "AND CURRICULUM NOT IN ('ADVST') " + \
          "AND GRADUATED NOT IN ('G') "
active = pd.read_sql_query(sql_str, connection)
active = active.drop_duplicates(['PEOPLE_CODE_ID'])


In [9]:
print(two_years_ago)
print(active.shape)
active.head()

2016
(1168, 1)


Unnamed: 0,PEOPLE_CODE_ID
0,P000026382
4,P000026411
6,P000027827
8,P000027831
12,P000027843


In [10]:
# keep transfer records for active students
df = pd.merge(df_td, active, how='inner', on='PEOPLE_CODE_ID')

In [11]:
print(df.shape)

(2646, 14)


In [12]:
df.head(30)

Unnamed: 0,PEOPLE_CODE_ID,ACADEMIC_YEAR,ACADEMIC_TERM,ACADEMIC_SESSION,EVENT_ID,EVENT_SUB_TYPE,SECTION,EVENT_MED_NAME,ORG_CODE_ID,CREDIT_TYPE,CREDIT,FINAL_GRADE,REFERENCE_EVENT_ID,REFERENCE_SUB_TYPE
0,P000041290,1999,Transfer,TRANS,ACC 101,LEC,1,Financial Accounting,O000002138,TRAN,3.0,TR,ACC 101,TRAN
1,P000041290,1999,Transfer,TRANS,BUS 101,LEC,1,Business Elect,O000002138,TRAN,3.0,TR,BUS 101,TRAN
2,P000041290,1999,Transfer,TRANS,ELEC 107,LEC,1,Electives,O000000134,TRAN,6.0,TR,ELEC 107,TRAN
3,P000041290,1999,Transfer,TRANS,FYS 101,LEC,1,First Year Seminar,O000000011,TRAN,0.0,TR,FYS 101,TRAN
4,P000050045,1999,Transfer,TRANS,ACC 101,LEC,1,Financial Accounting,O000002414,TRAN,3.0,TR,ACC 101,TRAN
5,P000050045,1999,Transfer,TRANS,ECN 101,LEC,1,Macroeconomics,O000002414,TRAN,3.0,TR,ECN 101,TRAN
6,P000050045,1999,Transfer,TRANS,ECN 102,LEC,1,Microeconomics,O000002414,TRAN,3.0,TR,ECN 102,TRAN
7,P000050045,1999,Transfer,TRANS,ECN 200,LEC,1,Principles of Econ,O000002414,TRAN,3.0,TR,ECN 200,TRAN
8,P000050045,1999,Transfer,TRANS,ELEC 100,LEC,1,Elective,O000002414,TRAN,3.0,TR,ELEC 100,TRAN
9,P000050045,1999,Transfer,TRANS,ENG 101,LEC,1,EffctiveCollegeWrtng,O000002414,TRAN,3.0,TR,ENG 101,TRAN


In [13]:
df = df.rename(columns={'PEOPLE_CODE_ID': 'student_integration_id',
                        'CREDIT': 'credits',
                        'FINAL_GRADE': 'ag_grade',
                       })


In [14]:
crs_id = (lambda c: (str(c['EVENT_ID']).replace(' ', '') +
                     str(c['EVENT_SUB_TYPE']).upper())
          if ((c['EVENT_SUB_TYPE'] == 'LAB') | (c['EVENT_SUB_TYPE'] == 'SI'))
          else (str(c['EVENT_ID']).replace(' ', ''))
          )
df.loc[:, 'transfer_course_number'] = df.apply(crs_id, axis=1)


In [15]:
df.loc[:, 'transfer_course_section_number'] = (df['EVENT_ID'] + '.' +
                                               df['EVENT_SUB_TYPE'] + '.' +
                                               'Transfer'
                                              )


In [16]:
df.loc[:, 'ag_grading_type'] = 'UNKNOWN'
df.loc[:, 'ag_status'] = 'TRANSFER'

In [17]:
df = df.loc[:, ['student_integration_id', 'transfer_course_number',
                'transfer_course_section_number',
                'ag_grade', 'ag_grading_type', 'ag_status',
                'credits', ]]


In [18]:
print(df.shape)

(2646, 7)


In [19]:
print(df.head())

  student_integration_id transfer_course_number  \
0             P000041290                 ACC101   
1             P000041290                 BUS101   
2             P000041290                ELEC107   
3             P000041290                 FYS101   
4             P000050045                 ACC101   

  transfer_course_section_number ag_grade ag_grading_type ag_status  credits  
0           ACC 101.LEC.Transfer       TR         UNKNOWN  TRANSFER      3.0  
1           BUS 101.LEC.Transfer       TR         UNKNOWN  TRANSFER      3.0  
2          ELEC 107.LEC.Transfer       TR         UNKNOWN  TRANSFER      6.0  
3           FYS 101.LEC.Transfer       TR         UNKNOWN  TRANSFER      0.0  
4           ACC 101.LEC.Transfer       TR         UNKNOWN  TRANSFER      3.0  


In [20]:
df = (df.sort_values(['student_integration_id', 
                      'transfer_course_section_number'])
        .drop_duplicates(['student_integration_id', 
                          'transfer_course_section_number'],
                         keep='last')
     )


In [21]:
print(df.shape)

(2646, 7)


In [22]:
fn_output = f'{today_str}_student_transfer_records.txt'
df.to_csv(fn_output, index=False)
