In [1]:
import numpy as np
import pandas as pd
import datetime as dt

print(f"numpy: {np.__version__}")
print(f"pandas: {pd.__version__}")

numpy: 1.16.4
pandas: 0.24.2


In [2]:
# local connection information
import local_db

connection = local_db.connection()


In [3]:
today = dt.datetime.today()
today_str = today.strftime("%Y%m%d")
print(today, type(today))
print(today_str)


2022-02-01 15:27:11.130470 <class 'datetime.datetime'>
20220201


In [4]:
sections_begin_year = "2015"
days_before_prereg_start = 7 * pd.offsets.Day()


print(today - days_before_prereg_start)


2022-01-25 15:27:11.130470


In [5]:
sql_str = (
    "SELECT ACADEMIC_YEAR, ACADEMIC_TERM, PRE_REG_DATE "
    + "FROM ACADEMICCALENDAR WHERE "
    + f"ACADEMIC_YEAR >= '{sections_begin_year}' "
    + "AND ACADEMIC_TERM IN ('FALL', 'SPRING', 'SUMMER') "
    + "AND ACADEMIC_SESSION IN ('MAIN', 'CULN', 'EXT', 'FNRR', 'HEOP', "
    + " 'SLAB', 'BLOCK A', 'BLOCK AB', 'BLOCK B') "
)
df_cal = pd.read_sql_query(sql_str, connection)

df_cal.head()

Unnamed: 0,ACADEMIC_YEAR,ACADEMIC_TERM,PRE_REG_DATE
0,2015,FALL,2015-03-23
1,2015,SPRING,2014-10-27
2,2015,SUMMER,2014-10-27
3,2015,SUMMER,2014-10-27
4,2015,SUMMER,2014-10-27


In [6]:
df_cal.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 44 entries, 0 to 43
Data columns (total 3 columns):
ACADEMIC_YEAR    44 non-null object
ACADEMIC_TERM    44 non-null object
PRE_REG_DATE     44 non-null datetime64[ns]
dtypes: datetime64[ns](1), object(2)
memory usage: 1.1+ KB


In [7]:
df_cal = (
    df_cal.groupby(['ACADEMIC_YEAR', 'ACADEMIC_TERM']).min()
    .reset_index()
)

df_cal['after_start_display_section_schedule'] = (today >= (df_cal['PRE_REG_DATE'] - days_before_prereg_start))

df_cal

Unnamed: 0,ACADEMIC_YEAR,ACADEMIC_TERM,PRE_REG_DATE,after_start_display_section_schedule
0,2015,FALL,2015-03-23,True
1,2015,SPRING,2014-10-27,True
2,2015,SUMMER,2014-10-27,True
3,2016,FALL,2016-03-28,True
4,2016,SPRING,2015-10-26,True
5,2016,SUMMER,2016-03-28,True
6,2017,FALL,2017-03-27,True
7,2017,SPRING,2016-10-31,True
8,2017,SUMMER,2017-03-27,True
9,2018,FALL,2018-03-26,True


In [8]:
sql_str = (
    "SELECT * FROM SECTIONSCHEDULE WHERE "
    + f"ACADEMIC_YEAR >= '{sections_begin_year}' "
    + "AND ACADEMIC_TERM IN ('FALL', 'SPRING', 'SUMMER') "
    + "AND ACADEMIC_SESSION IN ('MAIN', 'CULN', 'EXT', 'FNRR', 'HEOP', "
    + " 'SLAB', 'BLOCK A', 'BLOCK AB', 'BLOCK B') "
)
df_ss = pd.read_sql_query(sql_str, connection)


In [9]:
df_ss.shape

(4312, 23)

In [10]:
df = df_ss[
    [
        "ACADEMIC_YEAR",
        "ACADEMIC_TERM",
        "ACADEMIC_SESSION",
        "EVENT_ID",
        "EVENT_SUB_TYPE",
        "SECTION",
        "DAY",
        "START_TIME",
        "END_TIME",
        "BUILDING_CODE",
        "ROOM_ID",
    ]
]


In [11]:
df = df[~(df["EVENT_ID"].str.contains("REG", case=False))]
df = df[~(df["EVENT_ID"].str.contains("STDY", case=False))]


In [12]:
df["EVENT_SUB_TYPE"].value_counts()


LEC     2561
COMB     801
LAB      781
SI        38
PRAC      36
EXT       34
ACE       29
HYBD      27
INT        4
ONLN       1
Name: EVENT_SUB_TYPE, dtype: int64

In [13]:
df["DAY"].value_counts()


TTHR    1130
MWF      751
MW       408
MON      356
WED      305
TUE      294
THUR     238
ONLN     237
FRI      163
ALL      134
MTWR      85
TBD       70
SAT       39
SUN       31
MF        21
TWTF      15
WF        15
MTU        6
TWT        5
WTHR       4
MWRF       2
MTRF       1
MTW        1
WTF        1
Name: DAY, dtype: int64

In [14]:
df["BUILDING_CODE"].value_counts()


FREER     1418
PICKTT    1097
CANTWL     710
JWAL       273
ONLINE     234
TOMPKI     155
           144
SARATG     141
JWSC        45
FORCC       28
SAUN        17
PAOLZI      15
HYBRID       9
VIC          9
WOOD         5
LAMBRT       4
WELD         2
LCMC         2
UPR          2
OVL          2
Name: BUILDING_CODE, dtype: int64

In [15]:
print(df.shape)


(4312, 11)


In [16]:
df = df.loc[
    (~df["EVENT_SUB_TYPE"].isin(["ACE", "EXT", "ONLN"]))
    & (~df["DAY"].isin(["TBD", "ONLN", "CANC"]))
    & (~df["BUILDING_CODE"].isin(["ONLINE"]))
    & (~df["BUILDING_CODE"].isnull())
]


In [17]:
print(df.shape)
df

(3945, 11)


Unnamed: 0,ACADEMIC_YEAR,ACADEMIC_TERM,ACADEMIC_SESSION,EVENT_ID,EVENT_SUB_TYPE,SECTION,DAY,START_TIME,END_TIME,BUILDING_CODE,ROOM_ID
0,2015,SPRING,MAIN,ACC 101,LEC,01,MWF,1900-01-01 09:05:00,1900-01-01 10:00:00,PICKTT,223
1,2015,SPRING,MAIN,BAK 150,COMB,02,MW,1900-01-01 07:00:00,1900-01-01 10:00:00,CANTWL,102
2,2015,SPRING,MAIN,BAK 150,COMB,02,TTHR,1900-01-01 07:00:00,1900-01-01 11:00:00,CANTWL,102
3,2015,SPRING,MAIN,BAK 150,COMB,03,MW,1900-01-01 11:20:00,1900-01-01 14:20:00,CANTWL,102
4,2015,SPRING,MAIN,BAK 150,COMB,03,TTHR,1900-01-01 11:20:00,1900-01-01 15:20:00,CANTWL,102
5,2015,SPRING,MAIN,BAK 260,COMB,01,TTHR,1900-01-01 07:00:00,1900-01-01 11:00:00,CANTWL,105
6,2015,SPRING,MAIN,BAK 260,COMB,01,MW,1900-01-01 07:00:00,1900-01-01 10:00:00,CANTWL,105
7,2015,SPRING,MAIN,BAK 260,COMB,02,TTHR,1900-01-01 11:20:00,1900-01-01 15:20:00,CANTWL,105
8,2015,SPRING,MAIN,BAK 260,COMB,02,MW,1900-01-01 11:20:00,1900-01-01 14:20:00,CANTWL,105
9,2015,SPRING,MAIN,BAK 265,COMB,01,TTHR,1900-01-01 07:00:00,1900-01-01 11:00:00,CANTWL,105


In [18]:
df = pd.merge(df, df_cal, on=["ACADEMIC_YEAR", "ACADEMIC_TERM"], how="left")
print(df.shape)
df.head()

(3945, 13)


Unnamed: 0,ACADEMIC_YEAR,ACADEMIC_TERM,ACADEMIC_SESSION,EVENT_ID,EVENT_SUB_TYPE,SECTION,DAY,START_TIME,END_TIME,BUILDING_CODE,ROOM_ID,PRE_REG_DATE,after_start_display_section_schedule
0,2015,SPRING,MAIN,ACC 101,LEC,1,MWF,1900-01-01 09:05:00,1900-01-01 10:00:00,PICKTT,223,2014-10-27,True
1,2015,SPRING,MAIN,BAK 150,COMB,2,MW,1900-01-01 07:00:00,1900-01-01 10:00:00,CANTWL,102,2014-10-27,True
2,2015,SPRING,MAIN,BAK 150,COMB,2,TTHR,1900-01-01 07:00:00,1900-01-01 11:00:00,CANTWL,102,2014-10-27,True
3,2015,SPRING,MAIN,BAK 150,COMB,3,MW,1900-01-01 11:20:00,1900-01-01 14:20:00,CANTWL,102,2014-10-27,True
4,2015,SPRING,MAIN,BAK 150,COMB,3,TTHR,1900-01-01 11:20:00,1900-01-01 15:20:00,CANTWL,102,2014-10-27,True


In [19]:
df = df.loc[(df["after_start_display_section_schedule"]==True),:]
print(df.shape)
df.head()

(3945, 13)


Unnamed: 0,ACADEMIC_YEAR,ACADEMIC_TERM,ACADEMIC_SESSION,EVENT_ID,EVENT_SUB_TYPE,SECTION,DAY,START_TIME,END_TIME,BUILDING_CODE,ROOM_ID,PRE_REG_DATE,after_start_display_section_schedule
0,2015,SPRING,MAIN,ACC 101,LEC,1,MWF,1900-01-01 09:05:00,1900-01-01 10:00:00,PICKTT,223,2014-10-27,True
1,2015,SPRING,MAIN,BAK 150,COMB,2,MW,1900-01-01 07:00:00,1900-01-01 10:00:00,CANTWL,102,2014-10-27,True
2,2015,SPRING,MAIN,BAK 150,COMB,2,TTHR,1900-01-01 07:00:00,1900-01-01 11:00:00,CANTWL,102,2014-10-27,True
3,2015,SPRING,MAIN,BAK 150,COMB,3,MW,1900-01-01 11:20:00,1900-01-01 14:20:00,CANTWL,102,2014-10-27,True
4,2015,SPRING,MAIN,BAK 150,COMB,3,TTHR,1900-01-01 11:20:00,1900-01-01 15:20:00,CANTWL,102,2014-10-27,True


In [20]:
print(df.shape)


(3945, 13)


In [21]:
df["EVENT_SUB_TYPE"].value_counts()


LEC     2310
COMB     784
LAB      780
SI        36
PRAC      26
HYBD       9
Name: EVENT_SUB_TYPE, dtype: int64

In [22]:
df["DAY"].value_counts()


TTHR    1121
MWF      746
MW       407
MON      328
WED      303
TUE      294
THUR     238
FRI      162
ALL      128
MTWR      78
SAT       39
SUN       31
MF        21
WF        15
TWTF      15
MTU        6
TWT        5
WTHR       4
MTRF       1
MTW        1
WTF        1
MWRF       1
Name: DAY, dtype: int64

In [23]:
df["BUILDING_CODE"].value_counts()


FREER     1417
PICKTT    1096
CANTWL     708
JWAL       273
TOMPKI     152
SARATG     141
JWSC        43
FORCC       28
            21
SAUN        17
PAOLZI      14
VIC          9
HYBRID       9
WOOD         5
LAMBRT       4
WELD         2
OVL          2
LCMC         2
UPR          2
Name: BUILDING_CODE, dtype: int64

In [24]:
df.head()


Unnamed: 0,ACADEMIC_YEAR,ACADEMIC_TERM,ACADEMIC_SESSION,EVENT_ID,EVENT_SUB_TYPE,SECTION,DAY,START_TIME,END_TIME,BUILDING_CODE,ROOM_ID,PRE_REG_DATE,after_start_display_section_schedule
0,2015,SPRING,MAIN,ACC 101,LEC,1,MWF,1900-01-01 09:05:00,1900-01-01 10:00:00,PICKTT,223,2014-10-27,True
1,2015,SPRING,MAIN,BAK 150,COMB,2,MW,1900-01-01 07:00:00,1900-01-01 10:00:00,CANTWL,102,2014-10-27,True
2,2015,SPRING,MAIN,BAK 150,COMB,2,TTHR,1900-01-01 07:00:00,1900-01-01 11:00:00,CANTWL,102,2014-10-27,True
3,2015,SPRING,MAIN,BAK 150,COMB,3,MW,1900-01-01 11:20:00,1900-01-01 14:20:00,CANTWL,102,2014-10-27,True
4,2015,SPRING,MAIN,BAK 150,COMB,3,TTHR,1900-01-01 11:20:00,1900-01-01 15:20:00,CANTWL,102,2014-10-27,True


In [25]:
df.loc[:, "section_integration_id"] = (
    df["EVENT_ID"]
    + "."
    + df["EVENT_SUB_TYPE"]
    + "."
    + df["ACADEMIC_YEAR"]
    + "."
    + df["ACADEMIC_TERM"].str.title()
    + "."
    + df["SECTION"]
)


In [26]:
print(df.shape)
df.head()


(3945, 14)


Unnamed: 0,ACADEMIC_YEAR,ACADEMIC_TERM,ACADEMIC_SESSION,EVENT_ID,EVENT_SUB_TYPE,SECTION,DAY,START_TIME,END_TIME,BUILDING_CODE,ROOM_ID,PRE_REG_DATE,after_start_display_section_schedule,section_integration_id
0,2015,SPRING,MAIN,ACC 101,LEC,1,MWF,1900-01-01 09:05:00,1900-01-01 10:00:00,PICKTT,223,2014-10-27,True,ACC 101.LEC.2015.Spring.01
1,2015,SPRING,MAIN,BAK 150,COMB,2,MW,1900-01-01 07:00:00,1900-01-01 10:00:00,CANTWL,102,2014-10-27,True,BAK 150.COMB.2015.Spring.02
2,2015,SPRING,MAIN,BAK 150,COMB,2,TTHR,1900-01-01 07:00:00,1900-01-01 11:00:00,CANTWL,102,2014-10-27,True,BAK 150.COMB.2015.Spring.02
3,2015,SPRING,MAIN,BAK 150,COMB,3,MW,1900-01-01 11:20:00,1900-01-01 14:20:00,CANTWL,102,2014-10-27,True,BAK 150.COMB.2015.Spring.03
4,2015,SPRING,MAIN,BAK 150,COMB,3,TTHR,1900-01-01 11:20:00,1900-01-01 15:20:00,CANTWL,102,2014-10-27,True,BAK 150.COMB.2015.Spring.03


In [27]:
sql_str = "SELECT BUILDING_CODE, BUILD_NAME_1 FROM BUILDING "
building_codes = pd.read_sql_query(sql_str, connection)

print(building_codes.shape)
building_codes.head()


(45, 2)


Unnamed: 0,BUILDING_CODE,BUILD_NAME_1
0,ADK 1,Lower St. Regis Hall
1,ADK 2,Upper St. Regis Hall
2,ADM,Phelps Smith Administration Building
3,ALUMNI,Alumni Hall
4,APARK,Alumni Park


In [28]:
df = pd.merge(df, building_codes, on=["BUILDING_CODE"], how="left")

df.head()


Unnamed: 0,ACADEMIC_YEAR,ACADEMIC_TERM,ACADEMIC_SESSION,EVENT_ID,EVENT_SUB_TYPE,SECTION,DAY,START_TIME,END_TIME,BUILDING_CODE,ROOM_ID,PRE_REG_DATE,after_start_display_section_schedule,section_integration_id,BUILD_NAME_1
0,2015,SPRING,MAIN,ACC 101,LEC,1,MWF,1900-01-01 09:05:00,1900-01-01 10:00:00,PICKTT,223,2014-10-27,True,ACC 101.LEC.2015.Spring.01,Pickett
1,2015,SPRING,MAIN,BAK 150,COMB,2,MW,1900-01-01 07:00:00,1900-01-01 10:00:00,CANTWL,102,2014-10-27,True,BAK 150.COMB.2015.Spring.02,Cantwell
2,2015,SPRING,MAIN,BAK 150,COMB,2,TTHR,1900-01-01 07:00:00,1900-01-01 11:00:00,CANTWL,102,2014-10-27,True,BAK 150.COMB.2015.Spring.02,Cantwell
3,2015,SPRING,MAIN,BAK 150,COMB,3,MW,1900-01-01 11:20:00,1900-01-01 14:20:00,CANTWL,102,2014-10-27,True,BAK 150.COMB.2015.Spring.03,Cantwell
4,2015,SPRING,MAIN,BAK 150,COMB,3,TTHR,1900-01-01 11:20:00,1900-01-01 15:20:00,CANTWL,102,2014-10-27,True,BAK 150.COMB.2015.Spring.03,Cantwell


In [29]:
df = df.rename(columns={"BUILD_NAME_1": "building", "ROOM_ID": "room"})


In [30]:
df["start_time"] = df.START_TIME.dt.strftime("%I:%M%p")
df["end_time"] = df.END_TIME.dt.strftime("%I:%M%p")


In [31]:
df.head()


Unnamed: 0,ACADEMIC_YEAR,ACADEMIC_TERM,ACADEMIC_SESSION,EVENT_ID,EVENT_SUB_TYPE,SECTION,DAY,START_TIME,END_TIME,BUILDING_CODE,room,PRE_REG_DATE,after_start_display_section_schedule,section_integration_id,building,start_time,end_time
0,2015,SPRING,MAIN,ACC 101,LEC,1,MWF,1900-01-01 09:05:00,1900-01-01 10:00:00,PICKTT,223,2014-10-27,True,ACC 101.LEC.2015.Spring.01,Pickett,09:05AM,10:00AM
1,2015,SPRING,MAIN,BAK 150,COMB,2,MW,1900-01-01 07:00:00,1900-01-01 10:00:00,CANTWL,102,2014-10-27,True,BAK 150.COMB.2015.Spring.02,Cantwell,07:00AM,10:00AM
2,2015,SPRING,MAIN,BAK 150,COMB,2,TTHR,1900-01-01 07:00:00,1900-01-01 11:00:00,CANTWL,102,2014-10-27,True,BAK 150.COMB.2015.Spring.02,Cantwell,07:00AM,11:00AM
3,2015,SPRING,MAIN,BAK 150,COMB,3,MW,1900-01-01 11:20:00,1900-01-01 14:20:00,CANTWL,102,2014-10-27,True,BAK 150.COMB.2015.Spring.03,Cantwell,11:20AM,02:20PM
4,2015,SPRING,MAIN,BAK 150,COMB,3,TTHR,1900-01-01 11:20:00,1900-01-01 15:20:00,CANTWL,102,2014-10-27,True,BAK 150.COMB.2015.Spring.03,Cantwell,11:20AM,03:20PM


In [32]:
building_codes.head()


Unnamed: 0,BUILDING_CODE,BUILD_NAME_1
0,ADK 1,Lower St. Regis Hall
1,ADK 2,Upper St. Regis Hall
2,ADM,Phelps Smith Administration Building
3,ALUMNI,Alumni Hall
4,APARK,Alumni Park


In [33]:
sql_str = "SELECT CODE_VALUE, DAY_SORT FROM CODE_DAY "
day_codes = pd.read_sql_query(sql_str, connection)

print(day_codes.shape)
day_codes.head()


(29, 2)


Unnamed: 0,CODE_VALUE,DAY_SORT
0,ALL,12345.0
1,CANC,
2,FRI,5.0
3,MF,15.0
4,MON,1.0


In [34]:
day_func = lambda c: (
    str(c["DAY_SORT"])
    .replace("1", "M")
    .replace("2", "T")
    .replace("3", "W")
    .replace("4", "R")
    .replace("5", "F")
    .replace("6", "A")
    .replace("7", "S")
)
day_codes.loc[:, "meeting_days"] = day_codes.apply(day_func, axis=1)

print(day_codes.shape)
day_codes


(29, 3)


Unnamed: 0,CODE_VALUE,DAY_SORT,meeting_days
0,ALL,12345.0,MTWRF
1,CANC,,
2,FRI,5.0,F
3,MF,15.0,MF
4,MON,1.0,M
5,MTR,124.0,MTR
6,MTRF,1245.0,MTRF
7,MTU,12.0,MT
8,MTW,123.0,MTW
9,MTWF,1235.0,MTWF


In [35]:
df = pd.merge(df, day_codes, left_on=["DAY"], right_on=["CODE_VALUE"], how="left")

df.head()


Unnamed: 0,ACADEMIC_YEAR,ACADEMIC_TERM,ACADEMIC_SESSION,EVENT_ID,EVENT_SUB_TYPE,SECTION,DAY,START_TIME,END_TIME,BUILDING_CODE,room,PRE_REG_DATE,after_start_display_section_schedule,section_integration_id,building,start_time,end_time,CODE_VALUE,DAY_SORT,meeting_days
0,2015,SPRING,MAIN,ACC 101,LEC,1,MWF,1900-01-01 09:05:00,1900-01-01 10:00:00,PICKTT,223,2014-10-27,True,ACC 101.LEC.2015.Spring.01,Pickett,09:05AM,10:00AM,MWF,135,MWF
1,2015,SPRING,MAIN,BAK 150,COMB,2,MW,1900-01-01 07:00:00,1900-01-01 10:00:00,CANTWL,102,2014-10-27,True,BAK 150.COMB.2015.Spring.02,Cantwell,07:00AM,10:00AM,MW,13,MW
2,2015,SPRING,MAIN,BAK 150,COMB,2,TTHR,1900-01-01 07:00:00,1900-01-01 11:00:00,CANTWL,102,2014-10-27,True,BAK 150.COMB.2015.Spring.02,Cantwell,07:00AM,11:00AM,TTHR,24,TR
3,2015,SPRING,MAIN,BAK 150,COMB,3,MW,1900-01-01 11:20:00,1900-01-01 14:20:00,CANTWL,102,2014-10-27,True,BAK 150.COMB.2015.Spring.03,Cantwell,11:20AM,02:20PM,MW,13,MW
4,2015,SPRING,MAIN,BAK 150,COMB,3,TTHR,1900-01-01 11:20:00,1900-01-01 15:20:00,CANTWL,102,2014-10-27,True,BAK 150.COMB.2015.Spring.03,Cantwell,11:20AM,03:20PM,TTHR,24,TR


In [36]:
df = df.loc[
    :,
    [
        "section_integration_id",
        "meeting_days",
        "start_time",
        "end_time",
        "building",
        "room",
    ],
]


In [37]:
df = df.sort_values(
    ["section_integration_id", "meeting_days", "start_time"]
).drop_duplicates(["section_integration_id", "meeting_days", "start_time"], keep="last")


In [38]:
print(df.shape)


(3893, 6)


In [39]:
fn_output = f"{today_str}_section_schedules.txt"
df.to_csv(fn_output, index=False)
