In [1]:
import numpy as np
import pandas as pd
from datetime import date


In [2]:
# local connection information
import local_db
connection = local_db.connection()


In [3]:
today = date.today()
today_str = today.strftime('%Y%m%d')
print(today_str)

20180522


In [4]:
sql_str = "SELECT * FROM TRANSCRIPTDETAIL WHERE " + \
          "CREDIT_TYPE = 'TRAN' "
df_td = pd.read_sql_query(sql_str, connection)


In [5]:
print(df_td.shape)

(18373, 75)


In [6]:
df_td = df_td[['PEOPLE_CODE_ID', 'ACADEMIC_YEAR', 'ACADEMIC_TERM', 'ACADEMIC_SESSION', 
             'EVENT_ID', 'EVENT_SUB_TYPE', 'SECTION', 'EVENT_MED_NAME', 
             'ORG_CODE_ID', 'CREDIT_TYPE', 'CREDIT', 'FINAL_GRADE', 
             'REFERENCE_EVENT_ID', 'REFERENCE_SUB_TYPE', 
             ]]


In [7]:
print(df_td.shape)

(18373, 14)


In [8]:
# create active student list from 2-year rolling window
two_years_ago = today.year - 2
sql_str = "SELECT PEOPLE_CODE_ID FROM ACADEMIC WHERE " + \
          f"ACADEMIC_YEAR > '{two_years_ago}' " + \
          "AND PRIMARY_FLAG = 'Y' " + \
          "AND CURRICULUM NOT IN ('ADVST') " + \
          "AND GRADUATED NOT IN ('G') "
active = pd.read_sql_query(sql_str, connection)
active = active.drop_duplicates(['PEOPLE_CODE_ID'])


In [None]:
print(two_years_ago)
print(active.shape)
active.head()

In [10]:
# keep transfer records for active students
df = pd.merge(df_td, active, how='inner', on='PEOPLE_CODE_ID')

In [11]:
print(df.shape)

(2924, 14)


In [None]:
df.head(30)

In [13]:
crs_id = (lambda c: (str(c['EVENT_ID']).replace(' ', '') +
                     str(c['EVENT_SUB_TYPE']).upper())
          if ((c['EVENT_SUB_TYPE'] == 'LAB') | (c['EVENT_SUB_TYPE'] == 'SI'))
          else (str(c['EVENT_ID']).replace(' ', ''))
          )
df.loc[:, 'transfer_course_number'] = df.apply(crs_id, axis=1)


In [14]:
tr_section_id = (lambda c: (c['EVENT_ID'] + '.' +
                            c['EVENT_SUB_TYPE'] + '.Transfer'
                           )
                 if ((c['ACADEMIC_YEAR'] == '1999') | (c['ACADEMIC_YEAR'] == '2004'))
                 else (c['EVENT_ID'] + '.' +
                       c['EVENT_SUB_TYPE'] + '.' +
                       c['ACADEMIC_YEAR'] + '.' +
                       c['ACADEMIC_TERM'].title() + '.TR'
                      )
                )
df.loc[:, 'transfer_course_section_number'] = df.apply(tr_section_id, axis=1)

In [15]:
df.loc[:, 'ag_grading_type'] = 'P/F'
df.loc[:, 'ag_status'] = 'TRANSFER'

In [16]:
df = df.rename(columns={'PEOPLE_CODE_ID': 'student_integration_id',
                        'CREDIT': 'credits',
                        'EVENT_MED_NAME': 'course_title',
                        'ACADEMIC_YEAR': 'term_year',
                        'ACADEMIC_TERM': 'term_season',
                       })


In [17]:
tr_grade = (lambda c: 'P'
            if (c['FINAL_GRADE'] == 'TR')
            else 'NG'
           )
df.loc[:, 'ag_grade'] = df.apply(tr_grade, axis=1)
df = df[~df['ag_grade'].isnull()]

In [18]:
df = df.loc[:, ['student_integration_id', 'transfer_course_number',
                'transfer_course_section_number',
                'ag_grade', 'ag_grading_type', 'ag_status',
                'credits', ]]


In [19]:
print(df.shape)

(2924, 7)


In [None]:
print(df.head())

In [21]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2924 entries, 0 to 2923
Data columns (total 7 columns):
student_integration_id            2924 non-null object
transfer_course_number            2924 non-null object
transfer_course_section_number    2924 non-null object
ag_grade                          2924 non-null object
ag_grading_type                   2924 non-null object
ag_status                         2924 non-null object
credits                           2924 non-null float64
dtypes: float64(1), object(6)
memory usage: 182.8+ KB


In [22]:
df['transfer_course_section_number']

0              BIO 101.LEC.Transfer
1              CHM 141.LEC.Transfer
2              CHM 142.LEC.Transfer
3              COM 101.LEC.Transfer
4              ECN 102.LEC.Transfer
5             EDU 7900.LEC.Transfer
6             EDU 8400.LEC.Transfer
7              EDU7400.LEC.Transfer
8              ENG 101.LEC.Transfer
9              HST 202.LEC.Transfer
10            HUM 1000.LEC.Transfer
11             MAT 125.LEC.Transfer
12             PSY 101.LEC.Transfer
13             ENG 101.LEC.Transfer
14             CUL 280.LEC.Transfer
15            EDU 7100.LEC.Transfer
16             EDU7101.LEC.Transfer
17             ENG 101.LEC.Transfer
18             HST 201.LEC.Transfer
19         BIO 210.LEC.2014.Fall.TR
20         CHM 141.LEC.2014.Fall.TR
21         EST 101.LEC.2014.Fall.TR
22         EDU7102.LEC.2014.Fall.TR
23       CHM 142.LEC.2015.Spring.TR
24      EDU 7100.LEC.2015.Spring.TR
25       EDU7402.LEC.2015.Spring.TR
26         MAT 180.LEC.2017.Fall.TR
27             BIO 101.LEC.T

In [23]:
print(df['transfer_course_section_number'].value_counts().sort_index())


ACC 101.LEC.2015.Summer.TR     2
ACC 101.LEC.2016.Spring.TR     1
ACC 101.LEC.2016.Summer.TR     1
ACC 101.LEC.2017.Summer.TR     3
ACC 101.LEC.Transfer          32
ACC 102.LEC.2016.Spring.TR     1
ACC 102.LEC.2017.Summer.TR     1
ACC 102.LEC.Transfer           5
AR 100.LEC.Transfer            9
AR 101.LEC.Transfer            2
AR 300.LEC.Transfer            1
BAK 150.COMB.Transfer          3
BAK 160.COMB.Transfer          2
BAK 165.COMB.Transfer          2
BAK 260.COMB.Transfer          2
BAK 265.COMB.Transfer          2
BIO 101.LEC.2016.Fall.TR       1
BIO 101.LEC.2017.Summer.TR     1
BIO 101.LEC.Transfer          68
BIO 102.LEC.2016.Summer.TR     1
BIO 102.LEC.Transfer          98
BIO 204.LEC.Transfer           3
BIO 210.LEC.2014.Fall.TR       1
BIO 210.LEC.Transfer          24
BIO 225.LEC.Transfer           3
BIO 230.LEC.2017.Summer.TR     1
BIO 240.LEC.Transfer           1
BIO 300.LEC.2017.Summer.TR     2
BIO 363.LEC.2017.Summer.TR     1
BUS 101.LEC.Transfer           2
          

In [24]:
df = (df.sort_values(['student_integration_id', 
                      'transfer_course_section_number'])
        .drop_duplicates(['student_integration_id', 
                          'transfer_course_section_number'],
                         keep='last')
     )


In [25]:
print(df.shape)

(2924, 7)


In [26]:
fn_output = f'{today_str}_student_transfer_records.txt'
df.to_csv(fn_output, index=False)
