In [2]:
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd

# Setup

In [56]:
# Import data
df = pd.read_csv("../../data/graphing_table.csv")

In [57]:
# Which degree & university to examine
selected_course = 'Arts (Hons)'
selected_uni = 'NUS'
# Obtain the school for this selected_course
school = df.loc[(df.course==selected_course) & (df.uni==selected_uni), 'school'].drop_duplicates().item()
# Copy the main dataframe
input_df = df.copy().sort_values('year', ascending=True)

In [58]:
# What years is there information for that course on? 
course_years = input_df.loc[(input_df.course == selected_course)
                            & (input_df.uni==selected_uni), 'year']
min_year, max_year = course_years.min().item(), course_years.max().item()


# Identify rows that are summary stats at the university level
input_df['summary_row'] = 0 
# Do this for EVERY university
for uni_tmp in df.uni.unique().tolist():

    # Filter for that uni, for records within the year range
    tdf = input_df.loc[(input_df.uni == uni_tmp)
                   & (input_df.year.between(min_year, max_year))]

    # Aggregate relevant metrics
    metrics = ['Places', 'GPA', 'RP',  'employment_rate_overall','employment_rate_ft_perm','basic_monthly_mean','basic_monthly_median','gross_monthly_mean','gross_monthly_median','gross_mthly_25_percentile','gross_mthly_75_percentile']
    # 25th Percentile rows
    uni_25th_pctile = tdf.groupby('year', as_index=False)[metrics].agg(lambda x: x.quantile(0.25))
    uni_25th_pctile['uni'] = uni_tmp
    uni_25th_pctile['course'] = f'{uni_tmp} 25th Percentile'
    # Median Rows
    uni_median = tdf.groupby('year', as_index=False)[metrics].agg(lambda x: x.quantile(0.5))
    uni_median['uni'] = uni_tmp
    uni_median['course'] = f'{uni_tmp} Median'
    # 75th Percentile Rows
    uni_75th_percentile = tdf.groupby('year', as_index=False)[metrics].agg(lambda x: x.quantile(0.75))
    uni_75th_percentile['uni'] = uni_tmp
    uni_75th_percentile['course'] = f'{uni_tmp} 75th Percentile'
    stats_table = pd.concat([uni_median, uni_25th_pctile, uni_75th_percentile], ignore_index=True)
    stats_table['summary_row'] = 1
    # Join back to input_df
    input_df = pd.concat([input_df,stats_table], ignore_index=True)


# track if a row is a course row or not
input_df['course_row'] = input_df.summary_row.replace({0: 1, 1: 0 })

# Admission (RP)

In [59]:
# Extracting data for the selected course and the university's aggregation rows
course_data = input_df[(input_df['course'] == selected_course) & (input_df['uni'] == selected_uni)]
agg_data = input_df[(input_df['summary_row'] == 1) & (input_df['uni'] == selected_uni)]

# Creating the plot
fig = go.Figure()

# Adding line for the selected course
fig.add_trace(go.Scatter(
    x=course_data['year'], y=course_data['RP'],
    mode='lines', name=f"{selected_course}",
    line=dict(color='#0F7A8F', width=3, shape='linear')
))

# Colormapping. Will cycle 
agg_col_map = ['#7c6354', '#DB3F3F', '#317A41', '#222323', '#011638']
# Adding lines for the aggregation row
for idx, agg_row in enumerate(agg_data['course'].unique()):
    agg_row_data = agg_data[agg_data['course'] == agg_row]
    fig.add_trace(go.Scatter(
        x=agg_row_data['year'], y=agg_row_data['RP'],
        mode='lines', name=agg_row,
        line=dict(color= agg_col_map[idx%(len(agg_col_map)-1)], width=2, dash='dash'),
        opacity=1
    ))

# Lower RP limit rounded down to the nearest 5
min_rp = min(course_data.RP.min(), agg_data.RP.min())
min_rp_limit = 5 * (min_rp // 5)

# Adding padding to the X-axis
padding = 0.5
x_min = course_data['year'].min() - padding
x_max = course_data['year'].max() + padding


# Updating layout for the presentation preferences
fig.update_layout(
    title = f"RP cutoff standing for {selected_course} in {selected_uni}", 
    xaxis_title = 'Year', 
    yaxis_title = 'Rank Points', 
    yaxis=dict(range=[min_rp_limit, input_df['RP'].max()]),
    legend=dict(orientation="h", yanchor="bottom", y=-0.3, xanchor="center", x=0.5),
    xaxis=dict(range = [x_min, x_max], tickmode='linear', tick0=course_data['year'].min(), dtick=1),
    margin=dict(l=50, r=50, t=50, b=100)
)

fig.update_xaxes(tickangle=45)
fig.show()


# RP vs Median

In [66]:
# Filtering data for the latest year available for the selected course
latest_year = input_df.loc[(input_df['course'] == selected_course)
                           & (input_df['uni']==selected_uni), 'year'].max()
filtered_df = input_df[(input_df['year'] == latest_year)
                       & (input_df.summary_row==0)].copy()

# Divide gross_monthly_median by 1,000 for formatting
filtered_df['gross_monthly_median'] = filtered_df['gross_monthly_median'] / 1000

# Add custom data for tooltips (course name, RP, and gross monthly median)
filtered_df['tooltip'] = filtered_df.apply(lambda row: f"{row['course']}<br>RP: {row['RP']}<br>Median Salary: ${row['gross_monthly_median']:.2f}K", axis=1)
filtered_df['empty'] = ""

# Creating the scatter plot [NUS, NTU, SMU]
colors = ['#fe4a49', '#09814a', '#963484']
color_map = {uni: colors[i % len(colors)] for i, uni in enumerate(filtered_df['uni'].unique())}
# Store Graph Object traces
traces = []

# Highlighting the selected course
selected_data = filtered_df[(filtered_df['course'] == selected_course)
                            & (filtered_df.uni==selected_uni)]
selected_trace= go.Scatter(
            x=selected_data['RP'], 
            y=selected_data['gross_monthly_median'], 
            mode='markers', 
            name=f"{selected_course} ({selected_uni})", 
            marker=dict(color='#121619', size=14),
            hovertemplate='<b>{}</b><br>RP: {}<br>Median Salary: ${:.2f}K'.format(selected_course, selected_data['RP'].values[0], selected_data['gross_monthly_median'].values[0])
        )
traces.append(selected_trace)

# Other courses data
other_courses_df = filtered_df.loc[~((filtered_df.course == selected_course)
                                 & (filtered_df.uni==selected_uni))]

# Iterate over all other uni courses by uni
for uni in other_courses_df['uni'].unique():
    uni_data = other_courses_df[other_courses_df['uni'] == uni]
    trace = go.Scatter(
        x=uni_data['RP'],
        y=uni_data['gross_monthly_median'],
        mode='markers',
        name=uni,
        marker=dict(color=color_map[uni], size=8, opacity=0.6),
        customdata=uni_data['tooltip'],
        hovertemplate='%{customdata}'  # This ensures only the custom tooltip is displayed
    )
    # Add the trace
    traces.append(trace)




# Create the layout
layout = go.Layout(
    title=f"RP vs Gross Monthly Median Salary (Year: {latest_year})",
    xaxis=dict(title='RP'),
    yaxis=dict(title='Gross Monthly Median Salary (in thousands)'),
    legend=dict(title=None, orientation="h", yanchor="bottom", y=-0.25, xanchor="center", x=0.5),
    margin=dict(l=50, r=50, t=50, b=100)
)

# Create the figure
fig = go.Figure(data=traces, layout=layout)

fig.show()