In [2]:
# importing the required libraries
import os
import pandas as pd
import matplotlib.pyplot as plt

os.chdir( os.path.join("..", "..", "..") )

# importing custom modules
from Code.src.modules.db_ops import *
from Code.src.modules.dataManager import DataManager
from Code.src.modules.eda import *
from Code.src.modules.db_ops import *

# initializing the DataManager
DM = DataManager()

In [3]:
df = pd.read_csv( os.path.join("Data", "02_processed", "weights.csv") )

In [11]:
temp_weights = df.query(
    "(stu_prog_desc == 'MS Data Analytics Engineering') & \
        (stu_admit_term_desc == 'Fall 2021') & \
            (stu_visa == 'F1 Visa')"
).groupby(['reg_term_desc', 'crs']).agg({'weight': 'sum'}).reset_index()

temp_weights.to_csv( os.path.join("Data", "02_processed", "temp_weights.csv"), index=False )

In [17]:
db_enrollment = DM.get_data('EnrollmentFinalStatus', 'db', 'processed')

In [40]:
new_enrollments = db_enrollment.runQuery("""--sql
    SELECT stu_admit_term_desc, COUNT(DISTINCT stu_id) AS num_students
    FROM EnrollmentFinalStatus
    GROUP BY stu_admit_term_desc
    ORDER BY stu_admit_term_code
""")

new_enrollments.to_csv( os.path.join("Data", "02_processed", "new_enrollments.csv"), index=False )

In [29]:
df_enrollment.reg_status.unique()

['**Web Registered**', 'Wait Listed', 'Drop-Course Cancelled', '**Registered**', 'Drop/Delete', 'Web Drop (Liability)', 'Web Withdrawal', 'Withdrawal from Course', 'Selective Withdrawal Exception', 'Registered for Audit']
Categories (10, object): ['**Registered**', '**Web Registered**', 'Drop-Course Cancelled', 'Drop/Delete', ..., 'Wait Listed', 'Web Drop (Liability)', 'Web Withdrawal', 'Withdrawal from Course']

In [34]:
actual_admits = db_enrollment.runQuery("""--sql
    SELECT
        crs, reg_term_desc, COUNT(DISTINCT stu_id) AS num_students
    FROM EnrollmentFinalStatus
    WHERE
        stu_prog_desc = 'MS Data Analytics Engineering'
        AND stu_visa = 'F1 Visa'
        AND stu_admit_term_desc = 'Fall 2021'
    GROUP BY crs, reg_term_desc
    ORDER BY crs, reg_term_desc
""").pivot(
    index='crs', columns='reg_term_desc', values='num_students'
)

actual_admits.to_csv( os.path.join("Data", "02_processed", "actual_admits.csv"), index=True )

In [30]:
db_enrollment.runQuery("""--sql
    SELECT
        crs, COUNT(DISTINCT stu_id) AS num_students,
        SUM(CASE WHEN reg_term_desc = 'Fall 2021' THEN 1 ELSE 0 END) AS Fall_2021,
        SUM(CASE WHEN reg_term_desc = 'Spring 2022' THEN 1 ELSE 0 END) AS Spring_2022,
        SUM(CASE WHEN reg_term_desc = 'Fall 2022' THEN 1 ELSE 0 END) AS Fall_2022,
        SUM(CASE WHEN reg_term_desc = 'Spring 2023' THEN 1 ELSE 0 END) AS Spring_2023,
        SUM(CASE WHEN reg_term_desc IN ('Fall 2021', 'Spring 2022', 'Fall 2022', 'Spring 2023') THEN 1 ELSE 0 END) AS Total
    FROM EnrollmentFinalStatus
    WHERE
        stu_prog_desc = 'MS Data Analytics Engineering'
        AND stu_visa = 'F1 Visa'
        AND stu_admit_term_desc = 'Fall 2021'
    GROUP BY crs
    ORDER BY crs
""")

Unnamed: 0,crs,num_students,Fall_2021,Spring_2022,Fall_2022,Spring_2023,Total
0,AIT 502,1,0,0,1,0,1
1,AIT 512,1,0,0,1,0,1
2,AIT 524,56,12,24,24,2,62
3,AIT 526,45,1,0,44,1,46
4,AIT 580,135,131,21,1,0,153
...,...,...,...,...,...,...,...
73,SWE 796,1,0,1,0,0,1
74,SYST 530,1,0,1,0,0,1
75,SYST 538,1,0,0,1,0,1
76,SYST 568,17,3,10,5,0,18


In [42]:
df.stu_visa.unique()

array(['F1 Visa', 'Not Relevent'], dtype=object)

In [160]:
import random

# Inputs
# =======
enr_hist = pd.DataFrame([4,8]).sample(1)[0].values[0]           # Either 4 or 8 terms of history
max_code = 202310                                               # MAX(reg_term_code)
eat_code = 202310                                               # Expected stu_admit_term_code
term_list = [201810, 201870, 201910, 201970, 202010, 202070, 202110, 202170, 202210, 202270, 202310]
at_code,at_term,at_year = None, None, None

# Calculations
# =============
# Get Term and Year for code
def gt(x):
    if x is None: return None
    return 'Fall' if x%100 == 70 else 'Spring'
def gy(x):
    return None if x is None else x//100

# LIMITS
# =======
at_code_min = 201810                                          # Earliest Data Point Available
at_code_max_1 = max_code - 50*enr_hist                        # 50 per term, * [4 or 8] Difference from latest reg_term_code
at_code_max_2 = eat_code - 100                                # Atleast 2 terms before expected stu_admit_term_code
at_code = max(at_code_min, min(at_code_max_1, at_code_max_2))      # Actual stu_admit_term_code

print(f'Checking {enr_hist} terms of history:')
print(f'Latest Term: {max_code} : {gt(max_code)} {gy(max_code)}')
print(f'Expected: {eat_code} : {gt(eat_code)} {gy(eat_code)} to {gt(eat_code+50*enr_hist)} {gy(eat_code+50*enr_hist)}')

print(f'\nMin BAct: {at_code_min} : {gt(at_code_min)} {gy(at_code_min)} to {gt(at_code_min+50*enr_hist)} {gy(at_code_min+50*enr_hist)}')
print(f'Max BAct2: {at_code_max_1} : {gt(at_code_max_1)} {gy(at_code_max_1)} to {gt(at_code_max_1+50*enr_hist)} {gy(at_code_max_1+50*enr_hist)}')
print(f'Max BAct2: {at_code_max_2} : {gt(at_code_max_2)} {gy(at_code_max_2)} to {gt(at_code_max_2+50*enr_hist)} {gy(at_code_max_2+50*enr_hist)}')

print(f'\nBest Act: { at_code} : {gt( at_code)} {gy( at_code)} to {gt( at_code+50*enr_hist)} {gy( at_code+50*enr_hist)}')

Checking 8 terms of history:
Latest Term: 202310 : Spring 2023
Expected: 202310 : Spring 2023 to Spring 2027

Min BAct: 201810 : Spring 2018 to Spring 2022
Max BAct2: 201910 : Spring 2019 to Spring 2023
Max BAct2: 202210 : Spring 2022 to Spring 2026

Best Act: 201910 : Spring 2019 to Spring 2023


In [125]:
max_code // 100, max_code % 100
# (max_code - max_code % 100)/100

(2023, 10)

In [63]:
db_enrollment.runQuery("""--sql
    SELECT reg_term_desc, reg_term_code, COUNT(DISTINCT stu_id) AS num_students
    FROM EnrollmentFinalStatus
    GROUP BY reg_term_desc, reg_term_code
    ORDER BY reg_term_code
""")

Unnamed: 0,reg_term_desc,reg_term_code,num_students
0,Fall 2017,201770,1177
1,Spring 2018,201810,1183
2,Fall 2018,201870,1209
3,Spring 2019,201910,1268
4,Fall 2019,201970,1439
5,Spring 2020 - COVID-19,202010,1371
6,Fall 2020,202070,1335
7,Spring 2021,202110,1307
8,Fall 2021,202170,1593
9,Spring 2022,202210,1661


In [54]:
db_enrollment.runQuery("""--sql
    SELECT stu_admit_term_desc, COUNT(DISTINCT reg_term_desc) AS num_terms_of_enrollment
    FROM EnrollmentFinalStatus
    WHERE
        stu_visa = 'Not Relevent'
        AND stu_admit_term_name != 'Summer'
    GROUP BY stu_admit_term_desc
    ORDER BY stu_admit_term_code
""").head(60)

Unnamed: 0,stu_admit_term_desc,num_terms_of_enrollment
0,Fall 2010,1
1,Fall 2011,1
2,Spring 2012,3
3,Fall 2012,5
4,Spring 2013,3
5,Fall 2013,5
6,Spring 2014,6
7,Fall 2014,6
8,Spring 2015,7
9,Fall 2015,8


In [62]:
db_enrollment.runQuery("""--sql
    SELECT stu_admit_term_desc, reg_term_desc, COUNT(DISTINCT stu_id) AS num_stu_enrolling
    FROM EnrollmentFinalStatus
    WHERE
        stu_visa = 'F1 Visa'
        AND stu_admit_term_name != 'Summer'
        AND stu_admit_term_year > 2017
    GROUP BY stu_admit_term_desc, reg_term_desc
    ORDER BY stu_admit_term_code, reg_term_code
""").head(60)

Unnamed: 0,stu_admit_term_desc,reg_term_desc,num_stu_enrolling
0,Spring 2018,Spring 2018,73
1,Spring 2018,Fall 2018,72
2,Spring 2018,Spring 2019,67
3,Spring 2018,Fall 2019,31
4,Spring 2018,Spring 2020 - COVID-19,5
5,Spring 2018,Fall 2020,2
6,Spring 2018,Spring 2021,1
7,Fall 2018,Fall 2018,141
8,Fall 2018,Spring 2019,156
9,Fall 2018,Fall 2019,162


In [38]:
df[['stu_admit_term_desc', 'stu_prog_desc', 'stu_visa']].drop_duplicates(inplace=False)

Unnamed: 0,stu_admit_term_desc,stu_prog_desc,stu_visa
0,Spring 2018,MENG GeoConStruct Engineering,F1 Visa
6,Spring 2018,MS Applied Info Technology,F1 Visa
7,Spring 2018,MS Applied Info Technology,Not Relevent
33,Spring 2018,MS Civil & Infrastructure Engr,F1 Visa
35,Spring 2018,MS Civil & Infrastructure Engr,Not Relevent
...,...,...,...
14090,Spring 2023,MS Software Engineering,Not Relevent
14111,Spring 2023,MS Systems Engineering,Not Relevent
14112,Spring 2023,MS Systems Engineering,F1 Visa
14117,Spring 2023,MS Telecommunications,F1 Visa
