In [34]:
import numpy as np
import pandas as pd
import os
from datetime import date, datetime
from sqlalchemy import create_engine


In [35]:
# local connection information
db_user = os.environ.get('DB_USER')
db_pass = os.environ.get('DB_PASS')
engine = create_engine(f'mssql+pyodbc://{db_user}:{db_pass}' +
                       '@PSC-SQLProd/Campus6?' +
                       'driver=ODBC+Driver+13+for+SQL+Server')
connection = engine.connect()


In [36]:
sections_begin_year = '2011'


In [37]:
sql_str = "SELECT * FROM SECTIONS WHERE " + \
          "EVENT_SUB_TYPE NOT IN ('ADV') " + \
          f"AND ACADEMIC_YEAR >= '{sections_begin_year}' " + \
          "AND ACADEMIC_TERM IN ('FALL', 'SPRING', 'SUMMER') " + \
          "AND ACADEMIC_SESSION IN ('MAIN', 'CULN', 'EXT', 'FNRR', 'HEOP'," + \
          " 'SLAB', 'BLOCK A', 'BLOCK AB', 'BLOCK B') "
df_sections = pd.read_sql_query(sql_str, connection)


In [38]:
df = df_sections[['EVENT_ID', 'EVENT_SUB_TYPE', 'EVENT_MED_NAME',
                   'SECTION', 'CREDITS', 'MAX_PARTICIPANT',
                   'ACADEMIC_YEAR', 'ACADEMIC_TERM', 'ACADEMIC_SESSION',
                   'START_DATE', 'END_DATE', 'CIP_CODE',
                   'REVISION_DATE', 'REVISION_TIME',
                   ]]


print('ACADEMIC_TERM: ', df['ACADEMIC_TERM'].unique())

print('ACADEMIC_SESSION: ', df['ACADEMIC_SESSION'].unique())

In [39]:
print(df.shape)
df.head()

(5049, 14)


Unnamed: 0,EVENT_ID,EVENT_SUB_TYPE,EVENT_MED_NAME,SECTION,CREDITS,MAX_PARTICIPANT,ACADEMIC_YEAR,ACADEMIC_TERM,ACADEMIC_SESSION,START_DATE,END_DATE,CIP_CODE,REVISION_DATE,REVISION_TIME
0,ACC 101,LEC,Financial Accounting,1,3.0,35,2011,FALL,MAIN,2011-08-31,2011-12-16,,2013-08-19,1900-01-01 12:26:41.477
1,ACC 101,LEC,Financial Accounting,2,3.0,36,2011,FALL,MAIN,2011-08-31,2011-12-16,,2012-05-07,1900-01-01 13:59:40.743
2,ACC 101,LEC,Financial Accounting,3,3.0,30,2011,FALL,MAIN,2011-08-31,2011-12-16,,2011-12-16,1900-01-01 17:22:31.393
3,ACC 201,LEC,Small Bus Acct,1,3.0,0,2011,FALL,MAIN,2011-08-31,2011-12-16,,2011-09-28,1900-01-01 13:25:25.420
4,ACC 301,LEC,Small Business Acc,1,3.0,25,2011,FALL,MAIN,2011-08-31,2011-12-16,,2012-01-19,1900-01-01 12:47:48.237


In [40]:
df = df[~(df['EVENT_ID'].str.contains('REG', case=False))]
df = df[~(df['EVENT_ID'].str.contains('STDY', case=False))]


In [41]:
print(df.shape)


(4855, 14)


In [42]:
df = df.rename(columns={'EVENT_MED_NAME': 'course_section_name',
                        'CREDITS': 'credit_hours',
                        'MAX_PARTICIPANT': 'maximum_enrollment_count',
                        'START_DATE': 'start_dt',
                        'END_DATE': 'end_dt',
                        'CIP_CODE': 'course_cip_code',
                        })


In [43]:
crs_id = (lambda c: (str(c['EVENT_ID']).replace(' ', '') +
                     str(c['EVENT_SUB_TYPE']).lower())
          if ((c['EVENT_SUB_TYPE'] == 'LAB') | (c['EVENT_SUB_TYPE'] == 'SI'))
          else (str(c['EVENT_ID']).replace(' ', ''))
          )
df.loc[:, 'course_id'] = df.apply(crs_id, axis=1)


In [44]:
df.loc[:, 'course_section_id'] = (df['EVENT_ID'] + '.' +
                                  df['EVENT_SUB_TYPE'] + '.' +
                                  df['ACADEMIC_YEAR'] + '.' +
                                  df['ACADEMIC_TERM'].str.title() + '.' +
                                  df['SECTION']
                                  )
df.loc[:, 'integration_id'] = df.loc[:, 'course_section_id']


In [45]:
term_id = (lambda c: (c['ACADEMIC_YEAR'] + '.' +
                      str(c['ACADEMIC_TERM']).title())
           if (c['ACADEMIC_SESSION'] == 'MAIN')
           else (c['ACADEMIC_YEAR'] + '.' +
                 str(c['ACADEMIC_TERM']).title() + '.' +
                 c['ACADEMIC_SESSION'])
           )
df.loc[:, 'term_id'] = df.apply(term_id, axis=1)


In [46]:
df['AY'] = (pd.to_numeric(df['ACADEMIC_YEAR'], errors='coerce')
              .fillna(sections_begin_year).astype(np.int64))
cat_yr = (lambda c: c['AY'] if (c['ACADEMIC_TERM'] == 'FALL')
          else (c['AY'] - 1))
df.loc[:, 'catalog_year'] = df.apply(cat_yr, axis=1)


In [47]:
crs_sect_delv = (lambda c: '03'
                 if str(c['SECTION'])[:2] == 'HY'
                 else ('02' if str(c['SECTION'])[:2] == 'ON'
                       else '01')
                 )
df.loc[:, 'course_section_delivery'] = df.apply(crs_sect_delv, axis=1)


In [48]:
print(df.shape)
df.head()

(4855, 21)


Unnamed: 0,EVENT_ID,EVENT_SUB_TYPE,course_section_name,SECTION,credit_hours,maximum_enrollment_count,ACADEMIC_YEAR,ACADEMIC_TERM,ACADEMIC_SESSION,start_dt,...,course_cip_code,REVISION_DATE,REVISION_TIME,course_id,course_section_id,integration_id,term_id,AY,catalog_year,course_section_delivery
0,ACC 101,LEC,Financial Accounting,1,3.0,35,2011,FALL,MAIN,2011-08-31,...,,2013-08-19,1900-01-01 12:26:41.477,ACC101,ACC 101.LEC.2011.Fall.01,ACC 101.LEC.2011.Fall.01,2011.Fall,2011,2011,1
1,ACC 101,LEC,Financial Accounting,2,3.0,36,2011,FALL,MAIN,2011-08-31,...,,2012-05-07,1900-01-01 13:59:40.743,ACC101,ACC 101.LEC.2011.Fall.02,ACC 101.LEC.2011.Fall.02,2011.Fall,2011,2011,1
2,ACC 101,LEC,Financial Accounting,3,3.0,30,2011,FALL,MAIN,2011-08-31,...,,2011-12-16,1900-01-01 17:22:31.393,ACC101,ACC 101.LEC.2011.Fall.03,ACC 101.LEC.2011.Fall.03,2011.Fall,2011,2011,1
3,ACC 201,LEC,Small Bus Acct,1,3.0,0,2011,FALL,MAIN,2011-08-31,...,,2011-09-28,1900-01-01 13:25:25.420,ACC201,ACC 201.LEC.2011.Fall.01,ACC 201.LEC.2011.Fall.01,2011.Fall,2011,2011,1
4,ACC 301,LEC,Small Business Acc,1,3.0,25,2011,FALL,MAIN,2011-08-31,...,,2012-01-19,1900-01-01 12:47:48.237,ACC301,ACC 301.LEC.2011.Fall.01,ACC 301.LEC.2011.Fall.01,2011.Fall,2011,2011,1


In [None]:
df[(df['SECTION'].str[:2]=='HY')]

In [49]:
crs_integ_id = (lambda c: (c['EVENT_ID'] + '.' + str(c['catalog_year']))
                if (c['EVENT_SUB_TYPE'] == '')
                else (c['EVENT_ID'] + '.' + c['EVENT_SUB_TYPE'] + '.' +
                      str(c['catalog_year'])))
df.loc[:, 'course_integration_id'] = df.apply(crs_integ_id, axis=1)


In [50]:
print(df.shape)
df.head()

(4855, 22)


Unnamed: 0,EVENT_ID,EVENT_SUB_TYPE,course_section_name,SECTION,credit_hours,maximum_enrollment_count,ACADEMIC_YEAR,ACADEMIC_TERM,ACADEMIC_SESSION,start_dt,...,REVISION_DATE,REVISION_TIME,course_id,course_section_id,integration_id,term_id,AY,catalog_year,course_section_delivery,course_integration_id
0,ACC 101,LEC,Financial Accounting,1,3.0,35,2011,FALL,MAIN,2011-08-31,...,2013-08-19,1900-01-01 12:26:41.477,ACC101,ACC 101.LEC.2011.Fall.01,ACC 101.LEC.2011.Fall.01,2011.Fall,2011,2011,1,ACC 101.LEC.2011
1,ACC 101,LEC,Financial Accounting,2,3.0,36,2011,FALL,MAIN,2011-08-31,...,2012-05-07,1900-01-01 13:59:40.743,ACC101,ACC 101.LEC.2011.Fall.02,ACC 101.LEC.2011.Fall.02,2011.Fall,2011,2011,1,ACC 101.LEC.2011
2,ACC 101,LEC,Financial Accounting,3,3.0,30,2011,FALL,MAIN,2011-08-31,...,2011-12-16,1900-01-01 17:22:31.393,ACC101,ACC 101.LEC.2011.Fall.03,ACC 101.LEC.2011.Fall.03,2011.Fall,2011,2011,1,ACC 101.LEC.2011
3,ACC 201,LEC,Small Bus Acct,1,3.0,0,2011,FALL,MAIN,2011-08-31,...,2011-09-28,1900-01-01 13:25:25.420,ACC201,ACC 201.LEC.2011.Fall.01,ACC 201.LEC.2011.Fall.01,2011.Fall,2011,2011,1,ACC 201.LEC.2011
4,ACC 301,LEC,Small Business Acc,1,3.0,25,2011,FALL,MAIN,2011-08-31,...,2012-01-19,1900-01-01 12:47:48.237,ACC301,ACC 301.LEC.2011.Fall.01,ACC 301.LEC.2011.Fall.01,2011.Fall,2011,2011,1,ACC 301.LEC.2011


In [51]:
# read course_catalog.txt to find the correct catalog year
dfcat = pd.read_csv('../course_catalog/course_catalog.txt')
print(dfcat.shape)

(652, 8)


In [None]:
dfcat.head()

In [52]:
dfcat = (dfcat[['course_id', 'integration_id']]
         .rename({'integration_id': 'cat_integ_id'}, axis='columns')
         )

In [53]:
print(dfcat.shape)
dfcat.head()

(652, 2)


Unnamed: 0,course_id,cat_integ_id
0,ACC101,ACC 101.LEC.2010
1,ACC102,ACC 102.LEC.2010
2,ACC301,ACC 301.LEC.2011
3,AR100,AR 100.2011
4,AR101,AR 101.2011


In [54]:
df = pd.merge(df, dfcat, on=['course_id'], how='left')

In [55]:
print(df.shape)
df.head()

(5812, 23)


Unnamed: 0,EVENT_ID,EVENT_SUB_TYPE,course_section_name,SECTION,credit_hours,maximum_enrollment_count,ACADEMIC_YEAR,ACADEMIC_TERM,ACADEMIC_SESSION,start_dt,...,REVISION_TIME,course_id,course_section_id,integration_id,term_id,AY,catalog_year,course_section_delivery,course_integration_id,cat_integ_id
0,ACC 101,LEC,Financial Accounting,1,3.0,35,2011,FALL,MAIN,2011-08-31,...,1900-01-01 12:26:41.477,ACC101,ACC 101.LEC.2011.Fall.01,ACC 101.LEC.2011.Fall.01,2011.Fall,2011,2011,1,ACC 101.LEC.2011,ACC 101.LEC.2010
1,ACC 101,LEC,Financial Accounting,2,3.0,36,2011,FALL,MAIN,2011-08-31,...,1900-01-01 13:59:40.743,ACC101,ACC 101.LEC.2011.Fall.02,ACC 101.LEC.2011.Fall.02,2011.Fall,2011,2011,1,ACC 101.LEC.2011,ACC 101.LEC.2010
2,ACC 101,LEC,Financial Accounting,3,3.0,30,2011,FALL,MAIN,2011-08-31,...,1900-01-01 17:22:31.393,ACC101,ACC 101.LEC.2011.Fall.03,ACC 101.LEC.2011.Fall.03,2011.Fall,2011,2011,1,ACC 101.LEC.2011,ACC 101.LEC.2010
3,ACC 201,LEC,Small Bus Acct,1,3.0,0,2011,FALL,MAIN,2011-08-31,...,1900-01-01 13:25:25.420,ACC201,ACC 201.LEC.2011.Fall.01,ACC 201.LEC.2011.Fall.01,2011.Fall,2011,2011,1,ACC 201.LEC.2011,
4,ACC 301,LEC,Small Business Acc,1,3.0,25,2011,FALL,MAIN,2011-08-31,...,1900-01-01 12:47:48.237,ACC301,ACC 301.LEC.2011.Fall.01,ACC 301.LEC.2011.Fall.01,2011.Fall,2011,2011,1,ACC 301.LEC.2011,ACC 301.LEC.2011


In [56]:
df = (df.sort_values(['integration_id', 'course_integration_id'],
                     ascending=[True, True]))

df = df.loc[(df['course_integration_id'] >= df['cat_integ_id'])]

In [57]:
print(df.shape)
df.head(40)

(4915, 23)


Unnamed: 0,EVENT_ID,EVENT_SUB_TYPE,course_section_name,SECTION,credit_hours,maximum_enrollment_count,ACADEMIC_YEAR,ACADEMIC_TERM,ACADEMIC_SESSION,start_dt,...,REVISION_TIME,course_id,course_section_id,integration_id,term_id,AY,catalog_year,course_section_delivery,course_integration_id,cat_integ_id
0,ACC 101,LEC,Financial Accounting,1,3.0,35,2011,FALL,MAIN,2011-08-31,...,1900-01-01 12:26:41.477,ACC101,ACC 101.LEC.2011.Fall.01,ACC 101.LEC.2011.Fall.01,2011.Fall,2011,2011,1,ACC 101.LEC.2011,ACC 101.LEC.2010
1,ACC 101,LEC,Financial Accounting,2,3.0,36,2011,FALL,MAIN,2011-08-31,...,1900-01-01 13:59:40.743,ACC101,ACC 101.LEC.2011.Fall.02,ACC 101.LEC.2011.Fall.02,2011.Fall,2011,2011,1,ACC 101.LEC.2011,ACC 101.LEC.2010
2,ACC 101,LEC,Financial Accounting,3,3.0,30,2011,FALL,MAIN,2011-08-31,...,1900-01-01 17:22:31.393,ACC101,ACC 101.LEC.2011.Fall.03,ACC 101.LEC.2011.Fall.03,2011.Fall,2011,2011,1,ACC 101.LEC.2011,ACC 101.LEC.2010
414,ACC 101,LEC,Financial Accounting,1,3.0,37,2011,SPRING,MAIN,2011-01-24,...,1900-01-01 16:19:39.223,ACC101,ACC 101.LEC.2011.Spring.01,ACC 101.LEC.2011.Spring.01,2011.Spring,2011,2010,1,ACC 101.LEC.2010,ACC 101.LEC.2010
415,ACC 101,LEC,Financial Accounting,2,3.0,40,2011,SPRING,MAIN,2011-01-24,...,1900-01-01 11:55:12.630,ACC101,ACC 101.LEC.2011.Spring.02,ACC 101.LEC.2011.Spring.02,2011.Spring,2011,2010,1,ACC 101.LEC.2010,ACC 101.LEC.2010
798,ACC 101,LEC,Financial Accounting,1,3.0,39,2012,FALL,MAIN,2012-08-29,...,1900-01-01 12:58:46.230,ACC101,ACC 101.LEC.2012.Fall.01,ACC 101.LEC.2012.Fall.01,2012.Fall,2012,2012,1,ACC 101.LEC.2012,ACC 101.LEC.2010
799,ACC 101,LEC,Financial Accounting,2,3.0,39,2012,FALL,MAIN,2012-08-29,...,1900-01-01 13:40:25.613,ACC101,ACC 101.LEC.2012.Fall.02,ACC 101.LEC.2012.Fall.02,2012.Fall,2012,2012,1,ACC 101.LEC.2012,ACC 101.LEC.2010
1207,ACC 101,LEC,Financial Accounting,1,3.0,35,2012,SPRING,MAIN,2012-01-18,...,1900-01-01 08:55:07.910,ACC101,ACC 101.LEC.2012.Spring.01,ACC 101.LEC.2012.Spring.01,2012.Spring,2012,2011,1,ACC 101.LEC.2011,ACC 101.LEC.2010
1208,ACC 101,LEC,Financial Accounting,2,3.0,35,2012,SPRING,MAIN,2012-01-18,...,1900-01-01 11:40:30.007,ACC101,ACC 101.LEC.2012.Spring.02,ACC 101.LEC.2012.Spring.02,2012.Spring,2012,2011,1,ACC 101.LEC.2011,ACC 101.LEC.2010
1626,ACC 101,LEC,Financial Accounting,1,3.0,37,2013,FALL,MAIN,2013-08-28,...,1900-01-01 08:41:54.153,ACC101,ACC 101.LEC.2013.Fall.01,ACC 101.LEC.2013.Fall.01,2013.Fall,2013,2013,1,ACC 101.LEC.2013,ACC 101.LEC.2010


In [58]:
df = (df.sort_values(['course_section_id', 'course_integration_id'],
                     ascending=[True, True]))
# df[df.duplicated(['integration_id'])]


In [59]:
print(df.shape)
df.head(40)

(4915, 23)


Unnamed: 0,EVENT_ID,EVENT_SUB_TYPE,course_section_name,SECTION,credit_hours,maximum_enrollment_count,ACADEMIC_YEAR,ACADEMIC_TERM,ACADEMIC_SESSION,start_dt,...,REVISION_TIME,course_id,course_section_id,integration_id,term_id,AY,catalog_year,course_section_delivery,course_integration_id,cat_integ_id
0,ACC 101,LEC,Financial Accounting,1,3.0,35,2011,FALL,MAIN,2011-08-31,...,1900-01-01 12:26:41.477,ACC101,ACC 101.LEC.2011.Fall.01,ACC 101.LEC.2011.Fall.01,2011.Fall,2011,2011,1,ACC 101.LEC.2011,ACC 101.LEC.2010
1,ACC 101,LEC,Financial Accounting,2,3.0,36,2011,FALL,MAIN,2011-08-31,...,1900-01-01 13:59:40.743,ACC101,ACC 101.LEC.2011.Fall.02,ACC 101.LEC.2011.Fall.02,2011.Fall,2011,2011,1,ACC 101.LEC.2011,ACC 101.LEC.2010
2,ACC 101,LEC,Financial Accounting,3,3.0,30,2011,FALL,MAIN,2011-08-31,...,1900-01-01 17:22:31.393,ACC101,ACC 101.LEC.2011.Fall.03,ACC 101.LEC.2011.Fall.03,2011.Fall,2011,2011,1,ACC 101.LEC.2011,ACC 101.LEC.2010
414,ACC 101,LEC,Financial Accounting,1,3.0,37,2011,SPRING,MAIN,2011-01-24,...,1900-01-01 16:19:39.223,ACC101,ACC 101.LEC.2011.Spring.01,ACC 101.LEC.2011.Spring.01,2011.Spring,2011,2010,1,ACC 101.LEC.2010,ACC 101.LEC.2010
415,ACC 101,LEC,Financial Accounting,2,3.0,40,2011,SPRING,MAIN,2011-01-24,...,1900-01-01 11:55:12.630,ACC101,ACC 101.LEC.2011.Spring.02,ACC 101.LEC.2011.Spring.02,2011.Spring,2011,2010,1,ACC 101.LEC.2010,ACC 101.LEC.2010
798,ACC 101,LEC,Financial Accounting,1,3.0,39,2012,FALL,MAIN,2012-08-29,...,1900-01-01 12:58:46.230,ACC101,ACC 101.LEC.2012.Fall.01,ACC 101.LEC.2012.Fall.01,2012.Fall,2012,2012,1,ACC 101.LEC.2012,ACC 101.LEC.2010
799,ACC 101,LEC,Financial Accounting,2,3.0,39,2012,FALL,MAIN,2012-08-29,...,1900-01-01 13:40:25.613,ACC101,ACC 101.LEC.2012.Fall.02,ACC 101.LEC.2012.Fall.02,2012.Fall,2012,2012,1,ACC 101.LEC.2012,ACC 101.LEC.2010
1207,ACC 101,LEC,Financial Accounting,1,3.0,35,2012,SPRING,MAIN,2012-01-18,...,1900-01-01 08:55:07.910,ACC101,ACC 101.LEC.2012.Spring.01,ACC 101.LEC.2012.Spring.01,2012.Spring,2012,2011,1,ACC 101.LEC.2011,ACC 101.LEC.2010
1208,ACC 101,LEC,Financial Accounting,2,3.0,35,2012,SPRING,MAIN,2012-01-18,...,1900-01-01 11:40:30.007,ACC101,ACC 101.LEC.2012.Spring.02,ACC 101.LEC.2012.Spring.02,2012.Spring,2012,2011,1,ACC 101.LEC.2011,ACC 101.LEC.2010
1626,ACC 101,LEC,Financial Accounting,1,3.0,37,2013,FALL,MAIN,2013-08-28,...,1900-01-01 08:41:54.153,ACC101,ACC 101.LEC.2013.Fall.01,ACC 101.LEC.2013.Fall.01,2013.Fall,2013,2013,1,ACC 101.LEC.2013,ACC 101.LEC.2010


In [60]:
print(df.shape)
df = (df.sort_values(['course_section_id', 'course_integration_id'],
                     ascending=[True, True])
      .drop_duplicates(['course_section_id'], keep='last')
      )
print(df.shape)

(4915, 23)
(4414, 23)


In [61]:
print(df.shape)
df.head(40)

(4414, 23)


Unnamed: 0,EVENT_ID,EVENT_SUB_TYPE,course_section_name,SECTION,credit_hours,maximum_enrollment_count,ACADEMIC_YEAR,ACADEMIC_TERM,ACADEMIC_SESSION,start_dt,...,REVISION_TIME,course_id,course_section_id,integration_id,term_id,AY,catalog_year,course_section_delivery,course_integration_id,cat_integ_id
0,ACC 101,LEC,Financial Accounting,1,3.0,35,2011,FALL,MAIN,2011-08-31,...,1900-01-01 12:26:41.477,ACC101,ACC 101.LEC.2011.Fall.01,ACC 101.LEC.2011.Fall.01,2011.Fall,2011,2011,1,ACC 101.LEC.2011,ACC 101.LEC.2010
1,ACC 101,LEC,Financial Accounting,2,3.0,36,2011,FALL,MAIN,2011-08-31,...,1900-01-01 13:59:40.743,ACC101,ACC 101.LEC.2011.Fall.02,ACC 101.LEC.2011.Fall.02,2011.Fall,2011,2011,1,ACC 101.LEC.2011,ACC 101.LEC.2010
2,ACC 101,LEC,Financial Accounting,3,3.0,30,2011,FALL,MAIN,2011-08-31,...,1900-01-01 17:22:31.393,ACC101,ACC 101.LEC.2011.Fall.03,ACC 101.LEC.2011.Fall.03,2011.Fall,2011,2011,1,ACC 101.LEC.2011,ACC 101.LEC.2010
414,ACC 101,LEC,Financial Accounting,1,3.0,37,2011,SPRING,MAIN,2011-01-24,...,1900-01-01 16:19:39.223,ACC101,ACC 101.LEC.2011.Spring.01,ACC 101.LEC.2011.Spring.01,2011.Spring,2011,2010,1,ACC 101.LEC.2010,ACC 101.LEC.2010
415,ACC 101,LEC,Financial Accounting,2,3.0,40,2011,SPRING,MAIN,2011-01-24,...,1900-01-01 11:55:12.630,ACC101,ACC 101.LEC.2011.Spring.02,ACC 101.LEC.2011.Spring.02,2011.Spring,2011,2010,1,ACC 101.LEC.2010,ACC 101.LEC.2010
798,ACC 101,LEC,Financial Accounting,1,3.0,39,2012,FALL,MAIN,2012-08-29,...,1900-01-01 12:58:46.230,ACC101,ACC 101.LEC.2012.Fall.01,ACC 101.LEC.2012.Fall.01,2012.Fall,2012,2012,1,ACC 101.LEC.2012,ACC 101.LEC.2010
799,ACC 101,LEC,Financial Accounting,2,3.0,39,2012,FALL,MAIN,2012-08-29,...,1900-01-01 13:40:25.613,ACC101,ACC 101.LEC.2012.Fall.02,ACC 101.LEC.2012.Fall.02,2012.Fall,2012,2012,1,ACC 101.LEC.2012,ACC 101.LEC.2010
1207,ACC 101,LEC,Financial Accounting,1,3.0,35,2012,SPRING,MAIN,2012-01-18,...,1900-01-01 08:55:07.910,ACC101,ACC 101.LEC.2012.Spring.01,ACC 101.LEC.2012.Spring.01,2012.Spring,2012,2011,1,ACC 101.LEC.2011,ACC 101.LEC.2010
1208,ACC 101,LEC,Financial Accounting,2,3.0,35,2012,SPRING,MAIN,2012-01-18,...,1900-01-01 11:40:30.007,ACC101,ACC 101.LEC.2012.Spring.02,ACC 101.LEC.2012.Spring.02,2012.Spring,2012,2011,1,ACC 101.LEC.2011,ACC 101.LEC.2010
1626,ACC 101,LEC,Financial Accounting,1,3.0,37,2013,FALL,MAIN,2013-08-28,...,1900-01-01 08:41:54.153,ACC101,ACC 101.LEC.2013.Fall.01,ACC 101.LEC.2013.Fall.01,2013.Fall,2013,2013,1,ACC 101.LEC.2013,ACC 101.LEC.2010


In [62]:
df.loc[:, 'course_integration_id'] = df.loc[:, 'cat_integ_id']

In [63]:
print(df.shape)
df.head(40)

(4414, 23)


Unnamed: 0,EVENT_ID,EVENT_SUB_TYPE,course_section_name,SECTION,credit_hours,maximum_enrollment_count,ACADEMIC_YEAR,ACADEMIC_TERM,ACADEMIC_SESSION,start_dt,...,REVISION_TIME,course_id,course_section_id,integration_id,term_id,AY,catalog_year,course_section_delivery,course_integration_id,cat_integ_id
0,ACC 101,LEC,Financial Accounting,1,3.0,35,2011,FALL,MAIN,2011-08-31,...,1900-01-01 12:26:41.477,ACC101,ACC 101.LEC.2011.Fall.01,ACC 101.LEC.2011.Fall.01,2011.Fall,2011,2011,1,ACC 101.LEC.2010,ACC 101.LEC.2010
1,ACC 101,LEC,Financial Accounting,2,3.0,36,2011,FALL,MAIN,2011-08-31,...,1900-01-01 13:59:40.743,ACC101,ACC 101.LEC.2011.Fall.02,ACC 101.LEC.2011.Fall.02,2011.Fall,2011,2011,1,ACC 101.LEC.2010,ACC 101.LEC.2010
2,ACC 101,LEC,Financial Accounting,3,3.0,30,2011,FALL,MAIN,2011-08-31,...,1900-01-01 17:22:31.393,ACC101,ACC 101.LEC.2011.Fall.03,ACC 101.LEC.2011.Fall.03,2011.Fall,2011,2011,1,ACC 101.LEC.2010,ACC 101.LEC.2010
414,ACC 101,LEC,Financial Accounting,1,3.0,37,2011,SPRING,MAIN,2011-01-24,...,1900-01-01 16:19:39.223,ACC101,ACC 101.LEC.2011.Spring.01,ACC 101.LEC.2011.Spring.01,2011.Spring,2011,2010,1,ACC 101.LEC.2010,ACC 101.LEC.2010
415,ACC 101,LEC,Financial Accounting,2,3.0,40,2011,SPRING,MAIN,2011-01-24,...,1900-01-01 11:55:12.630,ACC101,ACC 101.LEC.2011.Spring.02,ACC 101.LEC.2011.Spring.02,2011.Spring,2011,2010,1,ACC 101.LEC.2010,ACC 101.LEC.2010
798,ACC 101,LEC,Financial Accounting,1,3.0,39,2012,FALL,MAIN,2012-08-29,...,1900-01-01 12:58:46.230,ACC101,ACC 101.LEC.2012.Fall.01,ACC 101.LEC.2012.Fall.01,2012.Fall,2012,2012,1,ACC 101.LEC.2010,ACC 101.LEC.2010
799,ACC 101,LEC,Financial Accounting,2,3.0,39,2012,FALL,MAIN,2012-08-29,...,1900-01-01 13:40:25.613,ACC101,ACC 101.LEC.2012.Fall.02,ACC 101.LEC.2012.Fall.02,2012.Fall,2012,2012,1,ACC 101.LEC.2010,ACC 101.LEC.2010
1207,ACC 101,LEC,Financial Accounting,1,3.0,35,2012,SPRING,MAIN,2012-01-18,...,1900-01-01 08:55:07.910,ACC101,ACC 101.LEC.2012.Spring.01,ACC 101.LEC.2012.Spring.01,2012.Spring,2012,2011,1,ACC 101.LEC.2010,ACC 101.LEC.2010
1208,ACC 101,LEC,Financial Accounting,2,3.0,35,2012,SPRING,MAIN,2012-01-18,...,1900-01-01 11:40:30.007,ACC101,ACC 101.LEC.2012.Spring.02,ACC 101.LEC.2012.Spring.02,2012.Spring,2012,2011,1,ACC 101.LEC.2010,ACC 101.LEC.2010
1626,ACC 101,LEC,Financial Accounting,1,3.0,37,2013,FALL,MAIN,2013-08-28,...,1900-01-01 08:41:54.153,ACC101,ACC 101.LEC.2013.Fall.01,ACC 101.LEC.2013.Fall.01,2013.Fall,2013,2013,1,ACC 101.LEC.2010,ACC 101.LEC.2010


In [64]:
df = df.loc[:, ['integration_id', 'course_section_name', 'course_section_id',
            'start_dt', 'end_dt', 'term_id', 'course_integration_id',
            'course_section_delivery', 'maximum_enrollment_count',
            'credit_hours',
            ]]

df = df.sort_values(['integration_id'])


In [65]:
print(df.shape)
df.head()

(4414, 10)


Unnamed: 0,integration_id,course_section_name,course_section_id,start_dt,end_dt,term_id,course_integration_id,course_section_delivery,maximum_enrollment_count,credit_hours
0,ACC 101.LEC.2011.Fall.01,Financial Accounting,ACC 101.LEC.2011.Fall.01,2011-08-31,2011-12-16,2011.Fall,ACC 101.LEC.2010,1,35,3.0
1,ACC 101.LEC.2011.Fall.02,Financial Accounting,ACC 101.LEC.2011.Fall.02,2011-08-31,2011-12-16,2011.Fall,ACC 101.LEC.2010,1,36,3.0
2,ACC 101.LEC.2011.Fall.03,Financial Accounting,ACC 101.LEC.2011.Fall.03,2011-08-31,2011-12-16,2011.Fall,ACC 101.LEC.2010,1,30,3.0
414,ACC 101.LEC.2011.Spring.01,Financial Accounting,ACC 101.LEC.2011.Spring.01,2011-01-24,2011-05-11,2011.Spring,ACC 101.LEC.2010,1,37,3.0
415,ACC 101.LEC.2011.Spring.02,Financial Accounting,ACC 101.LEC.2011.Spring.02,2011-01-24,2011-05-11,2011.Spring,ACC 101.LEC.2010,1,40,3.0


In [66]:
today = datetime.now().strftime('%Y%m%d')
fn_output = f'{today}_sections.txt'
df.to_csv(fn_output, index=False)
