In [1]:
import numpy as np
import pandas as pd
from datetime import date


In [2]:
# local connection information
import local_db
connection = local_db.connection()


In [3]:
today = date.today()
today_str = today.strftime('%Y%m%d')
print(today_str)

20180523


In [4]:
sections_begin_year = '2011'


In [5]:
sql_str = "SELECT * FROM SECTIONSCHEDULE WHERE " + \
          f"ACADEMIC_YEAR >= '{sections_begin_year}' " + \
          "AND ACADEMIC_TERM IN ('FALL', 'SPRING', 'SUMMER') " + \
          "AND ACADEMIC_SESSION IN ('MAIN', 'CULN', 'EXT', 'FNRR', 'HEOP', " + \
          " 'SLAB', 'BLOCK A', 'BLOCK AB', 'BLOCK B') "
df_ss = pd.read_sql_query(sql_str, connection)


In [6]:
df = df_ss[['ACADEMIC_YEAR', 'ACADEMIC_TERM', 'ACADEMIC_SESSION',
            'EVENT_ID', 'EVENT_SUB_TYPE',
            'SECTION', 'DAY',
            'START_TIME', 'END_TIME',
            'BUILDING_CODE', 'ROOM_ID',
           ]]


In [7]:
df = df[~(df['EVENT_ID'].str.contains('REG', case=False))]
df = df[~(df['EVENT_ID'].str.contains('STDY', case=False))]


In [None]:
print(df.shape)
df.head()

In [8]:
df.loc[:, 'section_integration_id'] = (df['EVENT_ID'] + '.' +
                                       df['EVENT_SUB_TYPE'] + '.' +
                                       df['ACADEMIC_YEAR'] + '.' +
                                       df['ACADEMIC_TERM'].str.title() + '.' +
                                       df['SECTION']
                                      )


In [None]:
df['EVENT_SUB_TYPE'].value_counts()

In [None]:
df['DAY'].value_counts()

In [None]:
df['BUILDING_CODE'].value_counts()

In [9]:
print(df.shape)

(5092, 12)


In [10]:
df = (df.loc[(~df['EVENT_SUB_TYPE'].isin(['ACE', 'EXT', 'ONLN'])) &
             (~df['DAY'].isin(['TBD', 'ONLN', 'CANC'])) &
             (~df['BUILDING_CODE'].isin(['ONLINE'])) &
             (~df['BUILDING_CODE'].isnull())
            ]
     )


In [11]:
print(df.shape)

(4827, 12)


In [None]:
df['EVENT_SUB_TYPE'].value_counts()

In [None]:
df['DAY'].value_counts()

In [None]:
df['BUILDING_CODE'].value_counts()

In [None]:
df.head()

In [12]:
sql_str = "SELECT BUILDING_CODE, BUILD_NAME_1 FROM BUILDING "
building_codes = pd.read_sql_query(sql_str, connection)

print(building_codes.shape)
building_codes.head()


(41, 2)


Unnamed: 0,BUILDING_CODE,BUILD_NAME_1
0,ADK 1,Lower St. Regis Hall
1,ADK 2,Upper St. Regis Hall
2,ADM,Phelps Smith Administration Building
3,ALUMNI,Alumni Hall
4,APARK,Alumni Park


In [13]:
df = pd.merge(df, building_codes, on=['BUILDING_CODE'], how='left')

df.head()

Unnamed: 0,ACADEMIC_YEAR,ACADEMIC_TERM,ACADEMIC_SESSION,EVENT_ID,EVENT_SUB_TYPE,SECTION,DAY,START_TIME,END_TIME,BUILDING_CODE,ROOM_ID,section_integration_id,BUILD_NAME_1
0,2011,SPRING,MAIN,ACC 101,LEC,1,MWF,1900-01-01 09:05:00,1900-01-01 10:00:00,PICKTT,223,ACC 101.LEC.2011.Spring.01,Pickett
1,2011,SPRING,MAIN,ACC 101,LEC,2,MWF,1900-01-01 11:15:00,1900-01-01 12:10:00,PICKTT,223,ACC 101.LEC.2011.Spring.02,Pickett
2,2011,SPRING,MAIN,ACC 102,LEC,1,TTHR,1900-01-01 11:10:00,1900-01-01 12:35:00,PICKTT,223,ACC 102.LEC.2011.Spring.01,Pickett
3,2011,SPRING,MAIN,BIO 102,LEC,1,MWF,1900-01-01 09:05:00,1900-01-01 10:00:00,FREER,AUD,BIO 102.LEC.2011.Spring.01,Freer
4,2011,SPRING,MAIN,BIO 102,LAB,1,MON,1900-01-01 14:30:00,1900-01-01 17:30:00,FREER,117,BIO 102.LAB.2011.Spring.01,Freer


In [14]:
df = df.rename(columns={
                        'BUILD_NAME_1': 'building',
                        'ROOM_ID': 'room',
                       })


In [15]:
df['start_time'] = df.START_TIME.dt.strftime('%I:%M%p')
df['end_time'] = df.END_TIME.dt.strftime('%I:%M%p')


In [None]:
df.head()

In [None]:
bldg_codes.head()

In [16]:
sql_str = "SELECT CODE_VALUE, DAY_SORT FROM CODE_DAY "
day_codes = pd.read_sql_query(sql_str, connection)

print(day_codes.shape)
day_codes.head()


(29, 2)


Unnamed: 0,CODE_VALUE,DAY_SORT
0,ALL,12345.0
1,CANC,
2,FRI,5.0
3,MF,15.0
4,MON,1.0


In [17]:
day_func = (lambda c: (str(c['DAY_SORT']).replace('1', 'M')
                                         .replace('2', 'T')
                                         .replace('3', 'W')
                                         .replace('4', 'R')
                                         .replace('5', 'F')
                                         .replace('6', 'A')
                                         .replace('7', 'S')
                      )
           )
day_codes.loc[:, 'meeting_days'] = day_codes.apply(day_func, axis=1)

print(day_codes.shape)
day_codes

(29, 3)


Unnamed: 0,CODE_VALUE,DAY_SORT,meeting_days
0,ALL,12345.0,MTWRF
1,CANC,,
2,FRI,5.0,F
3,MF,15.0,MF
4,MON,1.0,M
5,MTR,124.0,MTR
6,MTRF,1245.0,MTRF
7,MTU,12.0,MT
8,MTW,123.0,MTW
9,MTWF,1235.0,MTWF


In [18]:
df = pd.merge(df, day_codes, left_on=['DAY'], right_on=['CODE_VALUE'], how='left')

df.head()

Unnamed: 0,ACADEMIC_YEAR,ACADEMIC_TERM,ACADEMIC_SESSION,EVENT_ID,EVENT_SUB_TYPE,SECTION,DAY,START_TIME,END_TIME,BUILDING_CODE,room,section_integration_id,building,start_time,end_time,CODE_VALUE,DAY_SORT,meeting_days
0,2011,SPRING,MAIN,ACC 101,LEC,1,MWF,1900-01-01 09:05:00,1900-01-01 10:00:00,PICKTT,223,ACC 101.LEC.2011.Spring.01,Pickett,09:05AM,10:00AM,MWF,135,MWF
1,2011,SPRING,MAIN,ACC 101,LEC,2,MWF,1900-01-01 11:15:00,1900-01-01 12:10:00,PICKTT,223,ACC 101.LEC.2011.Spring.02,Pickett,11:15AM,12:10PM,MWF,135,MWF
2,2011,SPRING,MAIN,ACC 102,LEC,1,TTHR,1900-01-01 11:10:00,1900-01-01 12:35:00,PICKTT,223,ACC 102.LEC.2011.Spring.01,Pickett,11:10AM,12:35PM,TTHR,24,TR
3,2011,SPRING,MAIN,BIO 102,LEC,1,MWF,1900-01-01 09:05:00,1900-01-01 10:00:00,FREER,AUD,BIO 102.LEC.2011.Spring.01,Freer,09:05AM,10:00AM,MWF,135,MWF
4,2011,SPRING,MAIN,BIO 102,LAB,1,MON,1900-01-01 14:30:00,1900-01-01 17:30:00,FREER,117,BIO 102.LAB.2011.Spring.01,Freer,02:30PM,05:30PM,MON,1,M


In [19]:
df = df.loc[:, ['section_integration_id', 'meeting_days',
                'start_time', 'end_time',
                'building', 'room', 
               ]]


In [21]:
df = (df.sort_values(['section_integration_id', 
                      'meeting_days', 'start_time'])
        .drop_duplicates(['section_integration_id', 
                          'meeting_days', 'start_time'],
                         keep='last')
     )


In [22]:
print(df.shape)

(4804, 6)


In [23]:
fn_output = f'{today_str}_section_schedules.txt'
df.to_csv(fn_output, index=False)
