In [2]:
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import numpy as np
from dotenv import load_dotenv
load_dotenv()
import chart_studio.plotly as py
import chart_studio.tools as tls
import chart_studio
import os


In [3]:
# To embed charts in online article
plotly_username = os.environ['plotly_username']
plotly_apikey = os.environ['plotly_apikey']

chart_studio.tools.set_credentials_file(username=plotly_username
                                        , api_key=plotly_apikey)

In [4]:
# Import data
orig_df = pd.read_csv("../../data/graphing_table.csv")

# Generate summary rows # 
# Identify rows that are summary stats at the university level
orig_df['summary_row'] = 0 

# University level summaries
for uni_tmp in orig_df.uni.unique().tolist():

    # Filter for that uni, for records within the year range
    tdf = orig_df.loc[(orig_df.uni == uni_tmp)]

    # Aggregate relevant metrics
    metrics = ['Places', 'GPA', 'RP',  'employment_rate_overall','employment_rate_ft_perm','basic_monthly_mean','basic_monthly_median','gross_monthly_mean','gross_monthly_median','gross_mthly_25_percentile','gross_mthly_75_percentile']
    # 25th Percentile rows
    uni_25th_pctile = tdf.groupby('year', as_index=False)[metrics].agg(lambda x: x.quantile(0.25))
    uni_25th_pctile['uni'] = uni_tmp
    uni_25th_pctile['course'] = f'{uni_tmp} 25th Percentile'
    # Median Rows
    uni_median = tdf.groupby('year', as_index=False)[metrics].agg(lambda x: x.quantile(0.5))
    uni_median['uni'] = uni_tmp
    uni_median['course'] = f'{uni_tmp} Median'
    # 75th Percentile Rows
    uni_75th_percentile = tdf.groupby('year', as_index=False)[metrics].agg(lambda x: x.quantile(0.75))
    uni_75th_percentile['uni'] = uni_tmp
    uni_75th_percentile['course'] = f'{uni_tmp} 75th Percentile'
    stats_table = pd.concat([uni_median, uni_25th_pctile, uni_75th_percentile], ignore_index=True)
    stats_table['summary_row'] = 1
    # Join back to orig_df
    orig_df = pd.concat([orig_df,stats_table], ignore_index=True)

# Summary at the overall level
uni_25th_pctile = orig_df.groupby('year', as_index=False)[metrics].agg(lambda x: x.quantile(0.25))
uni_25th_pctile['uni'] = np.nan
uni_25th_pctile['course'] = f'Overall 25th Percentile'
# Median Rows
uni_median = orig_df.groupby('year', as_index=False)[metrics].agg(lambda x: x.quantile(0.5))
uni_median['uni'] = np.nan
uni_median['course'] = f'Overall Median'
# 75th Percentile Rows
uni_75th_percentile = orig_df.groupby('year', as_index=False)[metrics].agg(lambda x: x.quantile(0.75))
uni_75th_percentile['uni'] = np.nan
uni_75th_percentile['course'] = f'Overall 75th Percentile'
stats_table = pd.concat([uni_median, uni_25th_pctile, uni_75th_percentile], ignore_index=True)
stats_table['summary_row'] = 1
# Join back to orig_df
orig_df = pd.concat([orig_df,stats_table], ignore_index=True)



# track if a row is a course row or not
orig_df['course_row'] = orig_df.summary_row.replace({0: 1, 1: 0 })

# RP Scores

Which degrees have become the most sought-after from 2017 compared to now? 

In [5]:
df = orig_df.copy()
df = df.loc[(df.summary_row==0)]



def calculate_RP_change(group):
    # Find the earliest year with RP from 2017 onwards, or the next available year
    year_orig = group['year'].min()
    orig_RP = group[group['year'] == year_orig]['RP'].values[0]
    
    # Find the latest available year with RP
    year_end = group['year'].max()
    end_RP = group[group['year'] == year_end]['RP'].values[0]
    
    # Calculate RP change
    RP_change = end_RP - orig_RP
    
    return pd.Series({
        'year_orig': year_orig,
        'orig_RP': orig_RP,
        'year_end': year_end,
        'end_RP': end_RP,
        'RP_change': RP_change
    })

# Apply the function to the groupby object
result_df = df.groupby(['uni', 'course'])[['uni','course', 'year', 'RP']].apply(calculate_RP_change).reset_index()

# Sort by RP_change in descending order
result_df = result_df.sort_values(by='RP_change', ascending=False)
# Add the within-uni ranking
result_df['uni_ranking']= result_df.groupby('uni')['RP_change'].rank(ascending=False, method='dense')



In [6]:
# How much has the RP changed for each university?
result_df.groupby('uni').agg(
    mean_change = pd.NamedAgg(column='RP_change', aggfunc='mean')
    ,median_change = pd.NamedAgg(column='RP_change', aggfunc='median')
).round(2)

Unnamed: 0_level_0,mean_change,median_change
uni,Unnamed: 1_level_1,Unnamed: 2_level_1
NTU,0.14,0.0
NUS,0.38,0.0
SMU,0.0,0.62


In [7]:
metric = 'uni_ranking'
result_df.loc[(result_df.uni_ranking<=5)].sort_values(['uni',metric], ascending=True)\
    .set_index(['uni',metric])

Unnamed: 0_level_0,Unnamed: 1_level_0,course,year_orig,orig_RP,year_end,end_RP,RP_change
uni,uni_ranking,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
NTU,1.0,Computer Science,2013.0,68.75,2023.0,80.0,11.25
NTU,2.0,"Art, Design and Media",2013.0,62.5,2023.0,72.5,10.0
NTU,3.0,Public Policy and Global Affairs,2013.0,73.75,2023.0,81.25,7.5
NTU,4.0,Computer Engineering,2013.0,68.75,2023.0,75.0,6.25
NTU,5.0,Biological Sciences,2013.0,73.75,2023.0,78.75,5.0
NUS,1.0,Computer Science,2013.0,75.0,2023.0,85.0,10.0
NUS,2.0,Computer Engineering,2013.0,71.25,2023.0,80.0,8.75
NUS,3.0,Science (Hons),2013.0,73.75,2023.0,78.75,5.0
NUS,4.0,Industrial Design,2013.0,75.0,2023.0,78.75,3.75
NUS,4.0,Business Analytics,2013.0,81.25,2023.0,85.0,3.75


In [8]:
metric = 'RP_change'
result_df.sort_values(['uni',metric], ascending=True)\
    .set_index(['uni',metric])

Unnamed: 0_level_0,Unnamed: 1_level_0,course,year_orig,orig_RP,year_end,end_RP,uni_ranking
uni,RP_change,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
NTU,-13.75,Aerospace Engineering,2013.0,81.25,2023.0,67.50,16.0
NTU,-11.25,Chemical and Biomolecular Engineering,2013.0,81.25,2023.0,70.00,15.0
NTU,-10.00,Accountancy,2013.0,82.50,2023.0,72.50,14.0
NTU,-6.25,Business,2013.0,80.00,2023.0,73.75,13.0
NTU,-6.25,Maritime Studies,2013.0,75.00,2023.0,68.75,13.0
...,...,...,...,...,...,...,...
SMU,-1.25,Business Management,2013.0,76.25,2023.0,75.00,5.0
SMU,0.00,Law,2013.0,85.00,2023.0,85.00,4.0
SMU,1.25,Economics,2013.0,72.50,2023.0,73.75,3.0
SMU,2.50,Social Sciences,2013.0,72.50,2023.0,75.00,2.0


A common pattern emerges. In all three universities, tech-related degrees are becoming increasingly popular. The course with the greatest rise in admissions criteria is tech related in all 3 universities. 

# Tech Salaries

The driving force behind this surge in popularity are the lucrative job prospects undergraduates hope await them upon graduation. 

In [9]:
tech_degrees= {
    'NUS': ['Data Science and Analytics', 'Business Analytics', 'Computer Science', 'Computer Engineering', 'Information Security', 'Information Systems']
    , 'NTU': ['Computer Engineering', 'Computer Science', 'Data Science & Artificial Intelligence']
    , 'SMU': ['Information Systems']
}

In [10]:
df = orig_df.copy()
# Filter the DataFrame for tech-related degrees
df['is_tech_degree'] = df.apply(lambda x: 1 if x['course'] in tech_degrees.get(x['uni'], []) else 0, axis=1)

# Adjust gross_monthly_median to thousands
df['gross_monthly_median'] = df['gross_monthly_median'] / 1000

tech_degree_color = '#FF6173'
non_tech_degree_color = 'grey'

# Create the figure
fig = go.Figure()

# Plot the line plot for the overall median
median_df = df[(df['course'] == 'Overall Median') & (df['summary_row'] == 1)]
fig.add_trace(go.Scatter(
    x=median_df['year'],
    y=median_df['gross_monthly_median'],
    mode='lines',
    line=dict(color='black', dash='dash'),
    name='Overall Median'
))

# Compute the median yearlysalaary for tech degrees
tdf = df.loc[(df['is_tech_degree']==1)
             & (df.summary_row==0)]
# Median Rows
tdf_median = tdf.groupby('year', as_index=False)[metrics].agg(lambda x: x.quantile(0.5))
tdf_median['uni'] = np.nan
tdf_median['course'] = f'Tech Degree Median'
tdf_median['summary_row']=1

fig.add_trace(go.Scatter(
    x=tdf_median['year'],
    y=tdf_median['gross_monthly_median'],
    mode='lines',
    line=dict(color='red', dash='dash'),
    name='Tech Degree Median'
))

# Plot scatter plots for each non-summary course
df = df.sort_values('is_tech_degree', ascending=False)
for uni in df['uni'].unique():
    uni_df = df[(df['uni'] == uni) & (df['course_row'] == 1)]
    for course in uni_df['course'].unique():
        course_df = uni_df[uni_df['course'] == course]
        color = tech_degree_color if course_df['is_tech_degree'].iloc[0] == 1 else non_tech_degree_color
        fig.add_trace(go.Scatter(
            x=course_df['year'],
            y=course_df['gross_monthly_median'],
            mode='markers',
            marker=dict(color=color, opacity=0.7),
            name='Tech Degree' if color == tech_degree_color else 'Non-Tech Degree',
            showlegend=not any(d['name'] == ('Tech Degree' if color == tech_degree_color else 'Non-Tech Degree') for d in fig.data),
            hovertemplate=f"{uni}<br>{course}<br>Salary: %{{y:.2f}}K"
        ))

# Customize the layout
fig.update_layout(
    xaxis_fixedrange=True, yaxis_fixedrange=True,
    # title="Yearly degree starting salaries",
    title = {
        'text': 'Degree starting salaries'
        , 'y' : 0.95
        , 'x': 0.1
        , 'xanchor': 'left'
        , 'yanchor': 'top'
    },
    xaxis_title="Year",
    yaxis_title="Gross Monthly Median (SGD Thousands)",
    # legend_title="Degree Type",
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="left",
        x=0
    ),
    xaxis=dict(
        tickmode='linear',
        dtick=1  # Yearly ticks
    ),
    yaxis=dict(
        tickmode='linear',
        dtick=0.5  # $500 increments
    )
    , template='ggplot2'
    , margin = dict(r=10)
)

# Show the figure
fig.show()


In [11]:
median_df.loc[(median_df.year.between(2022, 2023)), ['year', 'uni', 'course', 'gross_monthly_median']]

Unnamed: 0,year,uni,course,gross_monthly_median
875,2022,,Overall Median,4.2
876,2023,,Overall Median,4.2


In [12]:
tmp_23_df = df.loc[(df.year==2023), ['year', 'uni', 'course', 'gross_monthly_median']]\
        .sort_values('gross_monthly_median', ascending=False)
tmp_22_df = df.loc[(df.year==2022), ['year', 'uni', 'course', 'gross_monthly_median']]\
        .sort_values('gross_monthly_median', ascending=False)
tdf = pd.concat([tmp_23_df, tmp_22_df], ignore_index=True)
tdf.to_csv("tmp.csv", index=False)

In [13]:
df.loc[(df.year==2022), ['year', 'uni', 'course', 'gross_monthly_median']]\
        .sort_values('gross_monthly_median', ascending=False)\
        

Unnamed: 0,year,uni,course,gross_monthly_median
235,2022,NUS,Computer Science,6.600
151,2022,NUS,Law,6.400
765,2022,SMU,Law,6.375
75,2022,NUS,Computer Engineering,6.000
699,2022,NTU,Data Science & Artificial Intelligence,5.625
...,...,...,...,...
541,2022,NTU,Linguistics and Multilingual Studies,3.500
672,2022,NTU,Philosophy,3.450
618,2022,NTU,Sports Science and Management,3.350
486,2022,NTU,"Art, Design and Media",3.200


In [14]:
# Host on plotly
# py.plot(fig, filename= 'yearly_salaries', auto_open=True)

In [15]:
overall_median_df = df[(df['course'] == 'Overall Median') & (df['summary_row'] == 1)]
tech_degrees_df = df[(df['is_tech_degree'] == 1) & (df['course_row'] == 1)]

# Group by year and calculate the required statistics
comparison_df = overall_median_df[['year', 'gross_monthly_median']].rename(columns={'gross_monthly_median': 'overall_median'})
comparison_df['tech_degree_median'] = tech_degrees_df.groupby('year')['gross_monthly_median'].median().values
comparison_df['tech_degree_mean'] = tech_degrees_df.groupby('year')['gross_monthly_median'].mean().values

# Calculate the percentage difference
comparison_df['pct_median_over'] = ((comparison_df['tech_degree_median'] - comparison_df['overall_median']) / comparison_df['overall_median']) * 100

comparison_df.set_index('year', inplace=True)

In [16]:
comparison_df

Unnamed: 0_level_0,overall_median,tech_degree_median,tech_degree_mean,pct_median_over
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2013,3.1,3.1875,3.205,2.822581
2014,3.204,3.45,3.408333,7.677903
2015,3.3,3.525,3.591667,6.818182
2016,3.4,3.75,3.75,10.294118
2017,3.4345,4.0,3.947857,16.465279
2018,3.5,4.0,4.146857,14.285714
2019,3.6,4.228,4.389714,17.444444
2020,3.7,4.542,4.645556,22.756757
2021,3.79,5.0,5.144444,31.926121
2022,4.2,5.5,5.642,30.952381


# Dip in the last year

Dumbell plot of tech salaries for year 2022 - 2023. 

In 2022 there were several high-profile layoffs in shopee, google, meta and more. This spelled trouble for many a graduating student. Did the reality reflect the fear on everyone's minds at the time? 

In [17]:
df = orig_df.copy()
# Filter the DataFrame for tech-related degrees
df['is_tech_degree'] = df.apply(lambda x: 1 if x['course'] in tech_degrees.get(x['uni'], []) else 0, axis=1)

# Adjust gross_monthly_median to thousands
# df['gross_monthly_median'] = df['gross_monthly_median'] / 1000


# Provided tech_degrees dictionary
tech_degrees = {
    'NUS': ['Data Science and Analytics', 'Business Analytics', 'Computer Science', 'Information Security', 'Information Systems'],
    'NTU': ['Computer Engineering', 'Computer Science', 'Data Science & Artificial Intelligence'],
    'SMU': ['Information Systems']
}

# Filter the DataFrame for tech-related degrees in 2022 and 2023
tech_degrees_df = df[(df['is_tech_degree'] == 1) & (df['course_row'] == 1) & df['year'].isin([2022, 2023])]

# Compute the change in salary from 2022 to 2023
change_df = tech_degrees_df.pivot_table(
    index=['uni', 'course'], 
    columns='year', 
    values='gross_monthly_median'
).reset_index()

change_df['change'] = change_df[2023] - change_df[2022]
change_df['course_uni'] = change_df.apply(lambda x: f"{x['course']} ({x['uni']})", axis=1)

# Sort the DataFrame by the 'change' column
change_df.sort_values(by='change', ascending=False, inplace=True)

# Create the bar plot with flipped axes
fig = go.Figure()

fig.add_trace(go.Bar(
    y=change_df['course_uni'],
    x=change_df['change'],
    orientation='h',
    marker_color=['red' if x < 0 else 'green' for x in change_df['change']],
    text=change_df['change'],
    textposition='outside'
))

# Add a vertical dashed line at x=0
fig.add_shape(
    type="line",
    x0=0, y0=-0.5, x1=0, y1=len(change_df) - 0.5,
    line=dict(color="black", width=2, dash="dash")
)

# Customize the layout
fig.update_layout(
    xaxis_fixedrange=True, yaxis_fixedrange=True,
    # title = {
    #         'text': "Change in Median Salaries for Tech Degrees (2022 to 2023)"
    #         , 'y' : 0.95
    #         , 'x': 0.1
    #         , 'xanchor': 'left'
    #         , 'yanchor': 'top'
    #     },
    title = "Change in Median Salaries for Tech Degrees (2022 to 2023)",
    xaxis_title="Change in Gross Monthly Median Salary",
    # yaxis_title="Degree (University)",
    xaxis=dict(
        tickformat="$,.0f",
        zeroline=False,  # Hide the zero line (since we added our own dashed line)
        range=[min(change_df['change']) - 100, max(change_df['change']) + 100]  # Adjust range to center 0
    ),
    yaxis=dict(autorange="reversed")  # To keep the degrees ordered as before
    , margin = dict(r=10, l=15)
)

# Show the figure
fig.show()

In [18]:
# Host on plotly
# py.plot(fig, filename= 'salary_change', auto_open=True)

In [19]:
df = orig_df.copy()
# Filter the DataFrame for tech-related degrees
df['is_tech_degree'] = df.apply(lambda x: 1 if x['course'] in tech_degrees.get(x['uni'], []) else 0, axis=1)

# Compute the overall average change in salary across all courses from 2022 to 2023
all_courses_df = df[(df['course_row'] == 1) & df['year'].isin([2022, 2023])]
overall_change = all_courses_df.pivot_table(index=['uni', 'course'], columns='year', values='gross_monthly_median')
print((overall_change[2023] - overall_change[2022]).median())
print((overall_change[2023] - overall_change[2022]).mean())

50.0
65.53731343283582


In [20]:
overall_change

Unnamed: 0_level_0,year,2022,2023
uni,course,Unnamed: 2_level_1,Unnamed: 3_level_1
NTU,Accountancy,3600.0,3600.0
NTU,Aerospace Engineering,5000.0,4763.0
NTU,"Art, Design and Media",3200.0,3498.0
NTU,Arts (Education),4129.0,4129.0
NTU,Bioengineering,4200.0,4043.0
...,...,...,...
SMU,Business Management,4400.0,4436.0
SMU,Economics,4500.0,4500.0
SMU,Information Systems,5400.0,5400.0
SMU,Law,6375.0,7000.0


In [21]:
df = orig_df.copy()
# Filter the DataFrame for tech-related degrees
df['is_tech_degree'] = df.apply(lambda x: 1 if x['course'] in tech_degrees.get(x['uni'], []) else 0, axis=1)

# Adjust gross_monthly_median to thousands
# df['gross_monthly_median'] = df['gross_monthly_median'] / 1000


# Provided tech_degrees dictionary
tech_degrees = {
    'NUS': ['Data Science and Analytics', 'Business Analytics', 'Computer Science', 'Information Security', 'Information Systems'],
    'NTU': ['Computer Engineering', 'Computer Science', 'Data Science & Artificial Intelligence'],
    'SMU': ['Information Systems']
}

# Filter the DataFrame for tech-related degrees in 2022 and 2023
tech_degrees_df = df[(df['is_tech_degree'] == 1) & (df['course_row'] == 1) & df['year'].isin([2022, 2023])]

# Compute the change in salary from 2022 to 2023 for each course
change_df = tech_degrees_df.pivot_table(
    index=['uni', 'course'], 
    columns='year', 
    values='gross_monthly_median'
).reset_index()

change_df['change'] = change_df[2023] - change_df[2022]
change_df['course_uni'] = change_df.apply(lambda x: f"{x['course']} ({x['uni']})", axis=1)

# Sort the DataFrame by the 'change' column
change_df.sort_values(by='change', ascending=False, inplace=True)

# Compute the overall average change in salary across all courses from 2022 to 2023
all_courses_df = df[(df['course_row'] == 1) & df['year'].isin([2022, 2023])]
overall_change = all_courses_df.pivot_table(index=['uni', 'course'], columns='year', values='gross_monthly_median')
overall_change = (overall_change[2023] - overall_change[2022]).mean()

# Create the bar plot with flipped axes
fig = go.Figure()

fig.add_trace(go.Bar(
    y=change_df['course_uni'],
    x=change_df['change'],
    orientation='h',
    marker_color=['red' if x < 0 else 'green' for x in change_df['change']],
    text=change_df['change'],
    textposition='outside'
))

# Add a vertical dashed line at x=0 (No Change)
fig.add_shape(
    type="line",
    x0=0, y0=-0.5, x1=0, y1=len(change_df) - 0.5,
    line=dict(color="black", width=2, dash="dash"),
    name='No Change'
)

# Add a vertical dashed line for overall average change
fig.add_shape(
    type="line",
    x0=overall_change, y0=-0.5, x1=overall_change, y1=len(change_df) - 0.5,
    line=dict(color="blue", width=2, dash="dash"),
    name='Overall Average Change'
)

# Add overall average change to legend
fig.add_trace(go.Scatter(
    x=[overall_change],
    y=[None],  # Invisible point to show the legend
    mode='lines',
    line=dict(color='blue', dash='dash'),
    name='Overall Average Change'
))

# Customize the layout
fig.update_layout(
    
    title="Change in Median Salaries for Tech Degrees (2022 to 2023)",
    xaxis_title="Change in Gross Monthly Median Salary",
    yaxis_title="",
    yaxis=dict(autorange="reversed"),  # To keep the degrees ordered as before
    xaxis=dict(
        tickformat="$,.0f",
        zeroline=False,  # Hide the zero line (since we added our own dashed line)
        tickmode='linear',
        dtick=250  # $250 increments
    ),
    showlegend=True
)

# Show the figure
fig.show()


# Employment Rates


In [22]:
orig_df.columns

Index(['uni', 'school', 'course', 'year', 'Places', 'GPA', 'RP',
       'employment_rate_overall', 'employment_rate_ft_perm',
       'basic_monthly_mean', 'basic_monthly_median', 'gross_monthly_mean',
       'gross_monthly_median', 'gross_mthly_25_percentile',
       'gross_mthly_75_percentile', 'university', 'summary_row', 'course_row'],
      dtype='object')

In [23]:
# Generate the table of employment rates between 2022 and 2023 for tech-related degrees
df = orig_df.copy()
df['is_tech_degree'] = df.apply(lambda x: 1 if x['course'] in tech_degrees.get(x['uni'], []) else 0, axis=1)


tech_employment = df.loc[(df.is_tech_degree==1)
                         & (df.summary_row==0)
                         & (df.year.between(2022, 2023))
                         , ['year', 'uni', 'course', 'employment_rate_ft_perm']]
# Keep the overall median too
overall_rows = df.loc[(df.year.between(2022, 2023))
                      & (df.course=='Overall Median')
                      , ['year', 'uni', 'course', 'employment_rate_ft_perm']]
# Append
tech_employment = pd.concat([tech_employment, overall_rows], ignore_index=True)
tech_employment = tech_employment.pivot(index=['uni', 'course'], columns=['year'], values=['employment_rate_ft_perm'])
# collapse column index
tech_employment.columns = tech_employment.columns.get_level_values(1)
# Delete column index name
tech_employment.columns.name=None
tech_employment=tech_employment.reset_index()

In [24]:
tech_employment.sort_values('uni')

Unnamed: 0,uni,course,2022,2023
1,NTU,Computer Engineering,85.2,87.5
2,NTU,Computer Science,91.7,86.5
3,NTU,Data Science & Artificial Intelligence,82.4,83.3
4,NUS,Business Analytics,95.2,93.0
5,NUS,Computer Science,96.1,91.9
6,NUS,Data Science and Analytics,96.8,80.4
7,NUS,Information Security,91.1,89.5
8,NUS,Information Systems,93.0,91.4
9,SMU,Information Systems,93.4,88.4
0,,Overall Median,90.9,84.55


# RP vs Salary

Even with this recent dip, the demand for these courses is unlikely to fall as their median salaries are still substantially greater than the alternatives. 

Even accounting for their competitiveness, these degrees are still the ideal choice for prospective students whose priority is maximising their earning power. 

<Insert table of median salary / RP>
<Insert plot and link to live dashboard>

# Computer Science Example

In [49]:
selected_course='Computer Engineering'
selected_uni='NUS'
input_df = orig_df.copy()

latest_year = input_df.loc[(input_df['course'] == selected_course)
                        & (input_df['uni']==selected_uni), 'year'].max()
filtered_df = input_df[(input_df['year'] == latest_year)
                    & (input_df.summary_row==0)].copy()

# Divide gross_monthly_median by 1,000 for formatting
filtered_df['gross_monthly_median'] = filtered_df['gross_monthly_median'] / 1000

# Add custom data for tooltips (course name, RP, and gross monthly median)
filtered_df['tooltip'] = filtered_df.apply(lambda row: f"{row['course']}<br>RP: {row['RP']}<br>Median Salary: ${row['gross_monthly_median']:.2f}K", axis=1)
filtered_df['empty'] = ""

# Creating the scatter plot [NUS, NTU, SMU]
colors = ['#fe4a49', '#09814a', '#1438CA']
color_map = {uni: colors[i % len(colors)] for i, uni in enumerate(filtered_df['uni'].unique())}
# Store Graph Object traces
traces = []

# Highlighting the selected course
selected_data = filtered_df[(filtered_df['course'] == selected_course)
                            & (filtered_df.uni==selected_uni)]
selected_trace= go.Scatter(
            x=selected_data['RP'], 
            y=selected_data['gross_monthly_median'], 
            mode='markers', 
            name=f"{selected_course} ({selected_uni})", 
            marker=dict(color='#F587F4', size=16),
            hovertemplate='<b>{}</b><br>RP: {}<br>Median Salary: ${:.2f}K'.format(selected_course, selected_data['RP'].values[0], selected_data['gross_monthly_median'].values[0])
        )
traces.append(selected_trace)

# Other courses data
other_courses_df = filtered_df.loc[~((filtered_df.course == selected_course)
                                & (filtered_df.uni==selected_uni))]

# Iterate over all other uni courses by uni
for uni in other_courses_df['uni'].unique():
    uni_data = other_courses_df[other_courses_df['uni'] == uni]
    trace = go.Scatter(
        x=uni_data['RP'],
        y=uni_data['gross_monthly_median'],
        mode='markers',
        name=uni,
        marker=dict(color=color_map[uni], size=8, opacity=0.4),
        customdata=uni_data['tooltip'],
        hovertemplate='%{customdata}'  # This ensures only the custom tooltip is displayed
    )
    # Add the trace
    traces.append(trace)

# Create the layout
# layout = go.Layout(
#     # title=f"RP vs Gross Monthly Median Salary (Year: {latest_year})",
#     title="RP VS Median Salary for 2023",
#     xaxis=dict(title='Rank Points'),
#     yaxis=dict(title='Median Gross Salary (thousands)'),
#     # For legend at top
#     legend=dict(orientation="h",yanchor="bottom", y=1.08,xanchor="center", x=0.5, traceorder="normal"),  
#     # legend=dict(title=None, orientation="h", yanchor="bottom", y=-0.25, xanchor="center", x=0.5),
#     margin=dict(l=50, r=50, t=15, b=100),
#     # height=300

# )

layout = go.Layout(
    xaxis_fixedrange=True, yaxis_fixedrange=True,
    # title=dict(
    #         text=f"RP vs Gross Monthly Median Salary (Year: {latest_year})",
    #         subtitle=dict(
    #             text="Each point is a degree from a uni, denoted by colour.",
    #             font=dict(color="gray", size=13),
    #         ),
    title=f"RP vs Gross Monthly Median Salary (Year: {latest_year})<br><sup>Each point is a degree from a university, denoted by colour. Hover over the point for more details.</sup>",
    xaxis=dict(title='Rank Points'),
    yaxis=dict(title='Median Monthly Salary (Thousands)'),
    legend=dict(title=None, orientation="h", yanchor="bottom", y=-0.25, xanchor="center", x=0.5),
    margin=dict(l=50, r=10, t=50, b=100)
 
)

# Create the figure
fig = go.Figure(data=traces, layout=layout)

fig.show()

In [50]:
# py.plot(fig, filename= 'comSci_vs_RP', auto_open=True)

'https://plotly.com/~carel/11/'