In [1]:
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd

# Setup

In [2]:
# Import data
df = pd.read_csv("../../data/graphing_table.csv")

In [3]:
# Which degree & university to examine
course = 'Arts (Hons)'
uni = 'NUS'
# Obtain the school for this course
school = df.loc[(df.course==course) & (df.uni==uni), 'school'].drop_duplicates().item()
# Copy the main dataframe
input_df = df.copy().sort_values('year', ascending=True)

In [4]:
# What years is there information for that course on? 
course_years = input_df.loc[(input_df.course == course)
                            & (input_df.uni==uni), 'year']
min_year, max_year = course_years.min().item(), course_years.max().item()


# Identify rows that are summary stats at the university level
input_df['summary_row'] = 0 
# Do this for EVERY university
for uni_tmp in df.uni.unique().tolist():

    # Filter for that uni, for records within the year range
    tdf = input_df.loc[(input_df.uni == uni_tmp)
                   & (input_df.year.between(min_year, max_year))]

    # Aggregate relevant metrics
    metrics = ['Places', 'GPA', 'RP',  'employment_rate_overall','employment_rate_ft_perm','basic_monthly_mean','basic_monthly_median','gross_monthly_mean','gross_monthly_median','gross_mthly_25_percentile','gross_mthly_75_percentile']
    # 25th Percentile rows
    uni_25th_pctile = tdf.groupby('year', as_index=False)[metrics].agg(lambda x: x.quantile(0.25))
    uni_25th_pctile['uni'] = uni_tmp
    uni_25th_pctile['course'] = f'{uni_tmp} 25th Percentile'
    # Median Rows
    uni_median = tdf.groupby('year', as_index=False)[metrics].agg(lambda x: x.quantile(0.5))
    uni_median['uni'] = uni_tmp
    uni_median['course'] = f'{uni_tmp} Median'
    # 75th Percentile Rows
    uni_75th_percentile = tdf.groupby('year', as_index=False)[metrics].agg(lambda x: x.quantile(0.75))
    uni_75th_percentile['uni'] = uni_tmp
    uni_75th_percentile['course'] = f'{uni_tmp} 75th Percentile'
    stats_table = pd.concat([uni_median, uni_25th_pctile, uni_75th_percentile], ignore_index=True)
    stats_table['summary_row'] = 1
    # Join back to input_df
    input_df = pd.concat([input_df,stats_table], ignore_index=True)


# track if a row is a course row or not
input_df['course_row'] = input_df.summary_row.replace({0: 1, 1: 0 })

# Admission (RP)

In [21]:
for idx, row in enumerate(['a', 'b' , 'c']):
    print(idx, row)

0 a
1 b
2 c


In [49]:
# Extracting data for the selected course and the university's aggregation rows
course_data = input_df[(input_df['course'] == course) & (input_df['uni'] == uni)]
agg_data = input_df[(input_df['summary_row'] == 1) & (input_df['uni'] == uni)]

# Creating the plot
fig = go.Figure()

# Adding line for the selected course
fig.add_trace(go.Scatter(
    x=course_data['year'], y=course_data['RP'],
    mode='lines', name=f"{course}",
    line=dict(color='#0F7A8F', width=3, shape='linear')
))

# Colormapping. Will cycle 
agg_col_map = ['#7c6354', '#DB3F3F', '#317A41', '#222323', '#011638']
# Adding lines for the aggregation row
for idx, agg_row in enumerate(agg_data['course'].unique()):
    agg_row_data = agg_data[agg_data['course'] == agg_row]
    fig.add_trace(go.Scatter(
        x=agg_row_data['year'], y=agg_row_data['RP'],
        mode='lines', name=agg_row,
        line=dict(color= agg_col_map[idx%(len(agg_col_map)-1)], width=2, dash='dash'),
        opacity=1
    ))
    print(idx, agg_col_map[idx%(len(agg_col_map)-1)])

# Lower RP limit rounded down to the nearest 5
min_rp = min(course_data.RP.min(), agg_data.RP.min())
min_rp_limit = 5 * (min_rp // 5)

# Adding padding to the X-axis
padding = 0.5
x_min = course_data['year'].min() - padding
x_max = course_data['year'].max() + padding


# Updating layout for the presentation preferences
fig.update_layout(
    title = f"RP cutoff standing for {course} in {uni}", 
    xaxis_title = 'Year', 
    yaxis_title = 'Rank Points', 
    yaxis=dict(range=[min_rp_limit, input_df['RP'].max()]),
    legend=dict(orientation="h", yanchor="bottom", y=-0.3, xanchor="center", x=0.5),
    xaxis=dict(range = [x_min, x_max], tickmode='linear', tick0=course_data['year'].min(), dtick=1),
    margin=dict(l=50, r=50, t=50, b=100)
)

fig.update_xaxes(tickangle=45)
fig.show()


0 #7c6354
1 #DB3F3F
2 #317A41


In [14]:
min_rp_limit

np.float64(60.0)

In [29]:
2 %  4

2