In [3]:
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import numpy as np
from dotenv import load_dotenv
load_dotenv()
import chart_studio.plotly as py
import chart_studio.tools as tls
import chart_studio
import os

In [4]:
# To embed charts in online article
plotly_username = os.environ['plotly_username']
plotly_apikey = os.environ['plotly_apikey']

chart_studio.tools.set_credentials_file(username=plotly_username
                                        , api_key=plotly_apikey)

# Introduction

To generate relevant graphs / tables for **GES 2024** article analysis. 

In [5]:
# To track "tech-related" degrees
tech_degrees= {
    'NUS': ['Data Science and Analytics', 'Business Analytics', 'Computer Science', 'Computer Engineering', 'Information Security', 'Information Systems']
    , 'NTU': ['Computer Engineering', 'Computer Science', 'Data Science & Artificial Intelligence']
    , 'SMU': ['Information Systems']
}

In [6]:
# Data Preparation

# Data has been updated to include '24 GES + IGP data
orig_df = pd.read_csv("../../data/graphing_table.csv")
# Drop columns that are entirely missing
orig_df.dropna(axis=1, how='all', inplace=True)

# Generate summary rows # 
# Identify rows that are summary stats at the university level
orig_df['summary_row'] = 0 

# University level summaries
for uni_tmp in orig_df.uni.unique().tolist():

    # Filter for that uni, for records within the year range
    tdf = orig_df.loc[(orig_df.uni == uni_tmp)]

    # Aggregate relevant metrics
    metrics = ['Places', 'GPA', 'RP',  'employment_rate_overall','employment_rate_ft_perm','basic_monthly_mean','basic_monthly_median','gross_monthly_mean','gross_monthly_median','gross_mthly_25_percentile','gross_mthly_75_percentile']
    # 25th Percentile rows
    uni_25th_pctile = tdf.groupby('year', as_index=False)[metrics].agg(lambda x: x.quantile(0.25))
    uni_25th_pctile['uni'] = uni_tmp
    uni_25th_pctile['course'] = f'{uni_tmp} 25th Percentile'
    # Median Rows
    uni_median = tdf.groupby('year', as_index=False)[metrics].agg(lambda x: x.quantile(0.5))
    uni_median['uni'] = uni_tmp
    uni_median['course'] = f'{uni_tmp} Median'
    # 75th Percentile Rows
    uni_75th_percentile = tdf.groupby('year', as_index=False)[metrics].agg(lambda x: x.quantile(0.75))
    uni_75th_percentile['uni'] = uni_tmp
    uni_75th_percentile['course'] = f'{uni_tmp} 75th Percentile'
    stats_table = pd.concat([uni_median, uni_25th_pctile, uni_75th_percentile], ignore_index=True)
    stats_table['summary_row'] = 1
    # Join back to orig_df
    orig_df = pd.concat([orig_df,stats_table], ignore_index=True)

# Summary at the overall level
uni_25th_pctile = orig_df.groupby('year', as_index=False)[metrics].agg(lambda x: x.quantile(0.25))
uni_25th_pctile['uni'] = np.nan
uni_25th_pctile['course'] = f'Overall 25th Percentile'
# Median Rows
uni_median = orig_df.groupby('year', as_index=False)[metrics].agg(lambda x: x.quantile(0.5))
uni_median['uni'] = np.nan
uni_median['course'] = f'Overall Median'
# 75th Percentile Rows
uni_75th_percentile = orig_df.groupby('year', as_index=False)[metrics].agg(lambda x: x.quantile(0.75))
uni_75th_percentile['uni'] = np.nan
uni_75th_percentile['course'] = f'Overall 75th Percentile'
stats_table = pd.concat([uni_median, uni_25th_pctile, uni_75th_percentile], ignore_index=True)
stats_table['summary_row'] = 1
# Join back to orig_df
orig_df = pd.concat([orig_df,stats_table], ignore_index=True)

# track if a row is a course row or not
orig_df['course_row'] = orig_df.summary_row.replace({0: 1, 1: 0 })

In [None]:
# orig_df.to_csv("../../data/2024_graphing_aggrowsIncluded.csv", index=False)

# Ideas

Points to investigate for this year's article: 

- Last year it was established that tech degrees experienced the greatest surge in popularity (2013 -> 2023), has that changed this year for these degrees? Can produce a table / chart of RP changes for tech degrees from '23 to '24 + see which non-tech degrees experienced a rise in RP(?)

- Recreate the yearly Degree Starting Salaries Scatterplot including '24. Further drill down into tech-degree median salaries only? 
    - Could relate this to the "Change in Median Salaries for Tech Degrees" plot, this time comparing '23 to '24.

- Try coming up with a visualization for the higher variance in starting salaries for tech degrees in '24

- Deeper analysis of employment trends for '24. Maybe some plot of salaries vs employment? 


# Admission Criteria Changes

Charting how the admissions critera (RP) changed from '23 to '24 for tech degrees. 

Also look for any unexpected rises in popularity

In [26]:
# Duplicate Original Table
df = orig_df.copy()

# Filter for tech degrees and years 2023-2024
tech_df = pd.DataFrame()

for uni, courses in tech_degrees.items():
    for course in courses:
        temp_df = df[(df['uni'] == uni) & 
                     (df['course'] == course) & 
                     (df['year'].isin([2023, 2024])) &
                     (df['summary_row'] != 1)]  # Exclude summary rows
        tech_df = pd.concat([tech_df, temp_df])

# Create a flat list of all tech degrees tuples (uni, course)
all_tech_degrees = []
for uni, courses in tech_degrees.items():
    for course in courses:
        all_tech_degrees.append((uni, course))

# Now add non-tech degrees to the analysis
# Filter for non-tech degrees and years 2023-2024
non_tech_df = df[(df['year'].isin([2023, 2024])) & (df['summary_row'] != 1)]

# Exclude tech degrees from the dataset
non_tech_rows = []
for _, row in non_tech_df.iterrows():
    if (row['uni'], row['course']) not in all_tech_degrees:
        non_tech_rows.append(row)

non_tech_df = pd.DataFrame(non_tech_rows)

# Combine tech and non-tech dataframes
combined_df = pd.concat([tech_df, non_tech_df])

# Add a column to indicate tech degrees
combined_df['tech_degree'] = combined_df.apply(
    lambda row: 1 if (row['uni'], row['course']) in all_tech_degrees else 0, 
    axis=1
)

# Create a pivot table to compare RP scores for all degrees
rp_comparison = combined_df.pivot_table(
    index=['uni', 'course', 'tech_degree'],
    columns='year',
    values='RP',
    aggfunc='first'  # Take the first value in case of duplicates
)

# Calculate the change in RP from 2023 to 2024
rp_comparison['Change'] = rp_comparison[2024] - rp_comparison[2023]
rp_comparison['Change_abs'] = abs(rp_comparison['Change'])

# Reset index for better display
rp_comparison = rp_comparison.reset_index()

# Sort by tech_degree first (tech degrees on top) and then by absolute change
rp_comparison = rp_comparison.sort_values(['tech_degree', 'Change_abs'], ascending=[False, False])

# Format the final output
result_table = rp_comparison.copy()
result_table['2023 RP'] = result_table[2023]
result_table['2024 RP'] = result_table[2024]
result_table['Change'] = result_table['Change']
result_table['Change Direction'] = result_table['Change'].apply(
    lambda x: '↑' if x > 0 else ('↓' if x < 0 else '−')
)
result_table = result_table[['uni', 'course', 'tech_degree', '2023 RP', '2024 RP', 'Change', 'Change Direction']]\
                .reset_index(drop=True)
result_table.columns.name=None

# Display the combined table
print("RP Changes for Degrees (2023-2024)")

# # Alternatively, you can limit to top changes if the table is too large
# # Uncomment the following lines to show only tech degrees and top 15 non-tech degrees

# tech_result = result_table[result_table['tech_degree'] == 1]
# non_tech_result = result_table[result_table['tech_degree'] == 0].head(15)
# final_result = pd.concat([tech_result, non_tech_result])
# print(final_result.to_string(index=False))


RP Changes for Degrees (2023-2024)


In [43]:
result_table.to_csv("./tmp.csv", index=False)

In [28]:
result_table.loc[(result_table.tech_degree==1)]

Unnamed: 0,uni,course,tech_degree,2023 RP,2024 RP,Change,Change Direction
0,NUS,Computer Engineering,1,80.0,85.0,5.0,↑
1,NUS,Information Security,1,82.5,85.0,2.5,↑
2,NTU,Computer Science,1,80.0,78.75,-1.25,↓
3,NTU,Data Science & Artificial Intelligence,1,82.5,81.25,-1.25,↓
4,NTU,Computer Engineering,1,75.0,75.0,0.0,−
5,NUS,Business Analytics,1,85.0,85.0,0.0,−
6,NUS,Computer Science,1,85.0,85.0,0.0,−
7,NUS,Data Science and Analytics,1,78.75,78.75,0.0,−
8,NUS,Information Systems,1,85.0,85.0,0.0,−
9,SMU,Information Systems,1,75.0,75.0,0.0,−


Concerning tech-degrees, not much change. Still as popular as they ever were. 

In [27]:
result_table.loc[(result_table.tech_degree==0)].head(10)

Unnamed: 0,uni,course,tech_degree,2023 RP,2024 RP,Change,Change Direction
10,NUS,Real Estate,0,73.75,82.5,8.75,↑
11,NTU,Physics / Applied Physics,0,62.5,67.5,5.0,↑
12,NTU,Environmental Engineering,0,68.75,65.0,-3.75,↓
13,NTU,Mathematical Sciences,0,68.75,72.5,3.75,↑
14,NTU,Science (Education),0,76.25,80.0,3.75,↑
15,NTU,Accountancy,0,72.5,75.0,2.5,↑
16,NTU,"Art, Design and Media",0,72.5,70.0,-2.5,↓
17,NTU,Arts (Education),0,70.0,72.5,2.5,↑
18,NTU,Chemistry and Biological Chemistry,0,72.5,70.0,-2.5,↓
19,NTU,Civil Engineering,0,63.75,61.25,-2.5,↓


For non-tech degrees, there's a *seemingly* large jump from 2023 to 2024. However this is <mark>unlikely to be because the course itself became more popular</mark>. 

From 2024 onwards the admissions for Real Estate, Business Administration, and Accountancy were merged into a single intake. The higher RP admission scores are likely from students applying for the other two courses, not for Real Estate. 

# Degree Starting Salaries

In [29]:
df = orig_df.copy()
# Filter the DataFrame for tech-related degrees
df['is_tech_degree'] = df.apply(lambda x: 1 if x['course'] in tech_degrees.get(x['uni'], []) else 0, axis=1)

# Adjust gross_monthly_median to thousands
df['gross_monthly_median'] = df['gross_monthly_median'] / 1000

tech_degree_color = '#FF6173'
non_tech_degree_color = 'grey'

# Create the figure
fig = go.Figure()

# Plot the line plot for the overall median
median_df = df[(df['course'] == 'Overall Median') & (df['summary_row'] == 1)]
fig.add_trace(go.Scatter(
    x=median_df['year'],
    y=median_df['gross_monthly_median'],
    mode='lines',
    line=dict(color='black', dash='dash'),
    name='Overall Median'
))

# Compute the median yearlysalaary for tech degrees
tdf = df.loc[(df['is_tech_degree']==1)
             & (df.summary_row==0)]
# Median Rows
tdf_median = tdf.groupby('year', as_index=False)[metrics].agg(lambda x: x.quantile(0.5))
tdf_median['uni'] = np.nan
tdf_median['course'] = f'Tech Degree Median'
tdf_median['summary_row']=1

fig.add_trace(go.Scatter(
    x=tdf_median['year'],
    y=tdf_median['gross_monthly_median'],
    mode='lines',
    line=dict(color='red', dash='dash'),
    name='Tech Degree Median'
))

# Plot scatter plots for each non-summary course
df = df.sort_values('is_tech_degree', ascending=False)
for uni in df['uni'].unique():
    uni_df = df[(df['uni'] == uni) & (df['course_row'] == 1)]
    for course in uni_df['course'].unique():
        course_df = uni_df[uni_df['course'] == course]
        color = tech_degree_color if course_df['is_tech_degree'].iloc[0] == 1 else non_tech_degree_color
        fig.add_trace(go.Scatter(
            x=course_df['year'],
            y=course_df['gross_monthly_median'],
            mode='markers',
            marker=dict(color=color, opacity=0.7),
            name='Tech Degree' if color == tech_degree_color else 'Non-Tech Degree',
            showlegend=not any(d['name'] == ('Tech Degree' if color == tech_degree_color else 'Non-Tech Degree') for d in fig.data),
            hovertemplate=f"{uni}<br>{course}<br>Salary: %{{y:.2f}}K"
        ))

# Customize the layout
fig.update_layout(
    xaxis_fixedrange=True, yaxis_fixedrange=True,
    # title="Yearly degree starting salaries",
    title = {
        'text': 'Degree starting salaries'
        , 'y' : 0.95
        , 'x': 0.1
        , 'xanchor': 'left'
        , 'yanchor': 'top'
    },
    xaxis_title="Year",
    yaxis_title="Gross Monthly Median (SGD Thousands)",
    # legend_title="Degree Type",
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="left",
        x=0
    ),
    xaxis=dict(
        tickmode='linear',
        dtick=1  # Yearly ticks
    ),
    yaxis=dict(
        tickmode='linear',
        dtick=0.5  # $500 increments
    )
    , template='ggplot2'
    , margin = dict(r=10)
)

# Show the figure
fig.show()


The gap between the starting salaries of tech-related degrees vs non tech-related degrees continues to widen. 

In [None]:
# Host on plotly
# py.plot(fig, filename= 'yearly_salaries_2024', auto_open=True)

'https://chart-studio.plotly.com/~carel/16/'

## Aggregate statistics for tech vs non-tech degrees

In [7]:
# Load the data
df=orig_df.copy()

# Filter for 2022, 2023 and 2024 data
df_recent = df[(df['year'] >= 2022) & (df['year'] <= 2024)]

# Filter out summary rows (rows where summary_row is 1)
df_courses = df_recent[df_recent['summary_row'] != 1]

# Define tech degrees (will use the tech_degrees variable that's available in your environment)
# This uses the tech_degrees dictionary available in your environment

# Create a function to determine if a row is a tech degree
def is_tech_degree(row):
    uni = row['uni']
    course = row['course']
    return uni in tech_degrees and course in tech_degrees[uni]

# Apply the function to create a new column
df_courses['is_tech'] = df_courses.apply(is_tech_degree, axis=1)

# Group by year and tech status, then calculate statistics
stats = df_courses.groupby(['year', 'is_tech'])[
    ['gross_mthly_25_percentile', 'gross_monthly_median', 'gross_mthly_75_percentile']
].median()

# Create a comparison table
rows = [
    'Tech 2022', 'Tech 2023', 'Tech 2024', 
    'Tech % Change 2022-2023', 'Tech % Change 2023-2024',
    'Non-Tech 2022', 'Non-Tech 2023', 'Non-Tech 2024', 
    'Non-Tech % Change 2022-2023', 'Non-Tech % Change 2023-2024',
    'Tech vs Non-Tech % Diff 2022', 'Tech vs Non-Tech % Diff 2023', 'Tech vs Non-Tech % Diff 2024'
]

comparison = pd.DataFrame(index=rows, 
                          columns=['25th Percentile', 'Median', '75th Percentile'])

# Mapping of column names
col_mapping = {
    'gross_mthly_25_percentile': '25th Percentile',
    'gross_monthly_median': 'Median',
    'gross_mthly_75_percentile': '75th Percentile'
}

# Fill in the data
for metric, col_name in col_mapping.items():
    # Tech values
    tech_2022 = stats.loc[(2022, True), metric]
    tech_2023 = stats.loc[(2023, True), metric]
    tech_2024 = stats.loc[(2024, True), metric]
    tech_change_22_23 = ((tech_2023 / tech_2022) - 1) * 100
    tech_change_23_24 = ((tech_2024 / tech_2023) - 1) * 100
    
    # Non-tech values
    non_tech_2022 = stats.loc[(2022, False), metric]
    non_tech_2023 = stats.loc[(2023, False), metric]
    non_tech_2024 = stats.loc[(2024, False), metric]
    non_tech_change_22_23 = ((non_tech_2023 / non_tech_2022) - 1) * 100
    non_tech_change_23_24 = ((non_tech_2024 / non_tech_2023) - 1) * 100
    
    # Calculate differences
    diff_2022 = ((tech_2022 / non_tech_2022) - 1) * 100
    diff_2023 = ((tech_2023 / non_tech_2023) - 1) * 100
    diff_2024 = ((tech_2024 / non_tech_2024) - 1) * 100
    
    # Populate table with raw values (no special formatting)
    comparison.loc['Tech 2022', col_name] = tech_2022
    comparison.loc['Tech 2023', col_name] = tech_2023
    comparison.loc['Tech 2024', col_name] = tech_2024
    comparison.loc['Tech % Change 2022-2023', col_name] = tech_change_22_23
    comparison.loc['Tech % Change 2023-2024', col_name] = tech_change_23_24
    
    comparison.loc['Non-Tech 2022', col_name] = non_tech_2022
    comparison.loc['Non-Tech 2023', col_name] = non_tech_2023
    comparison.loc['Non-Tech 2024', col_name] = non_tech_2024
    comparison.loc['Non-Tech % Change 2022-2023', col_name] = non_tech_change_22_23
    comparison.loc['Non-Tech % Change 2023-2024', col_name] = non_tech_change_23_24
    
    comparison.loc['Tech vs Non-Tech % Diff 2022', col_name] = diff_2022
    comparison.loc['Tech vs Non-Tech % Diff 2023', col_name] = diff_2023
    comparison.loc['Tech vs Non-Tech % Diff 2024', col_name] = diff_2024

# Let's also see how many tech vs non-tech courses we're comparing
# Check that these are fairly consistent every year
tech_counts = df_courses.groupby(['year', 'is_tech']).size()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_courses['is_tech'] = df_courses.apply(is_tech_degree, axis=1)


In [8]:
comparison

Unnamed: 0,25th Percentile,Median,75th Percentile
Tech 2022,4850.0,5500.0,6447.5
Tech 2023,4850.0,5450.0,6230.0
Tech 2024,4950.0,5500.0,6550.0
Tech % Change 2022-2023,0.0,-0.909091,-3.373401
Tech % Change 2023-2024,2.061856,0.917431,5.136437
Non-Tech 2022,3625.0,4000.0,4562.5
Non-Tech 2023,3800.0,4129.0,4550.0
Non-Tech 2024,4000.0,4342.0,4917.5
Non-Tech % Change 2022-2023,4.827586,3.225,-0.273973
Non-Tech % Change 2023-2024,5.263158,5.158634,8.076923


From '23 to '24, the salary gap between tech vs non-tech holding steady / dropping very slightly. 

But it's not all good, need to look at employment rates. 

# Change in Tech Salaries by year

In [16]:
# Initial years for comparison
start_year, end_year = 2023, 2024
df = orig_df.copy()

# Filter the DataFrame for tech-related degrees
df['is_tech_degree'] = df.apply(lambda x: 1 if x['course'] in tech_degrees.get(x['uni'], []) else 0, axis=1)

# Filter the DataFrame for tech-related degrees in specified years
tech_degrees_df = df[(df['is_tech_degree'] == 1) & (df['course_row'] == 1) & df['year'].isin([start_year, end_year])]

# Compute the change in salary
change_df = tech_degrees_df.pivot_table(
    index=['uni', 'course'], 
    columns='year', 
    values='gross_monthly_median'
).reset_index()
change_df['change'] = change_df[end_year] - change_df[start_year]
change_df['course_uni'] = change_df.apply(lambda x: f"{x['course']} ({x['uni']})", axis=1)

# Sort the DataFrame by the 'change' column
change_df.sort_values(by='change', ascending=False, inplace=True)

# Create the bar plot with flipped axes
fig = go.Figure()
fig.add_trace(go.Bar(
    y=change_df['course_uni'],
    x=change_df['change'],
    orientation='h',
    marker_color=['red' if x < 0 else 'green' for x in change_df['change']],
    # Modified: Add "+" before positive values
    text=[f"+{int(x)}" if x > 0 else f"{int(x)}" for x in change_df['change']],
    textposition='outside'
))

# Add a vertical dashed line at x=0
fig.add_shape(
    type="line",
    x0=0, y0=-0.5, x1=0, y1=len(change_df) - 0.5,
    line=dict(color="black", width=2, dash="dash")
)

# Customize the layout
fig.update_layout(
    xaxis_fixedrange=True, yaxis_fixedrange=True,
    title=f"Change in Median Salaries for Tech Degrees ({start_year} to {end_year})",
    xaxis_title="Change in Gross Monthly Median Salary",
    xaxis=dict(
        tickformat="$,.0f",
        zeroline=False,
        range=[min(change_df['change']) - 100, max(change_df['change']) + 100]
    ),
    yaxis=dict(autorange="reversed"),
    margin=dict(r=10, l=15)
)

# Show the figure for Task 1
fig.show()

In [18]:
# Visualization to compare changes across three years
# Years to include
years = [2022, 2023, 2024]

# Filter tech degrees for all three years
tech_three_years = df[(df['is_tech_degree'] == 1) & (df['course_row'] == 1) & df['year'].isin(years)]

# Pivot the data to get median salaries for each year
pivot_df = tech_three_years.pivot_table(
    index=['uni', 'course'], 
    columns='year', 
    values='gross_monthly_median'
).reset_index()

# Calculate changes between years and net change
pivot_df['change_22_23'] = pivot_df[2023] - pivot_df[2022]
pivot_df['change_23_24'] = pivot_df[2024] - pivot_df[2023]
pivot_df['change_22_24'] = pivot_df[2024] - pivot_df[2022]  # Net change

# Create course_uni label
pivot_df['course_uni'] = pivot_df.apply(lambda x: f"{x['course']} ({x['uni']})", axis=1)

# Sort by net change
pivot_df.sort_values(by='change_22_24', ascending=False, inplace=True)

# Create the grouped bar chart
fig2 = go.Figure()

# Add bars for each type of change
fig2.add_trace(go.Bar(
    y=pivot_df['course_uni'],
    x=pivot_df['change_22_23'],
    name='2022 to 2023',
    orientation='h',
    marker_color='blue',
    # Round to nearest integer
    text=[f"+{int(round(x))}" if x > 0 else f"{int(round(x))}" for x in pivot_df['change_22_23']],
    textposition='outside'
))

fig2.add_trace(go.Bar(
    y=pivot_df['course_uni'],
    x=pivot_df['change_23_24'],
    name='2023 to 2024',
    orientation='h',
    marker_color='green',
    # Round to nearest integer
    text=[f"+{int(round(x))}" if x > 0 else f"{int(round(x))}" for x in pivot_df['change_23_24']],
    textposition='outside'
))

fig2.add_trace(go.Bar(
    y=pivot_df['course_uni'],
    x=pivot_df['change_22_24'],
    name='Net (2022 to 2024)',
    orientation='h',
    # Changed color from purple to pink
    marker_color='pink',
    # Round to nearest integer
    text=[f"+{int(round(x))}" if x > 0 else f"{int(round(x))}" for x in pivot_df['change_22_24']],
    textposition='outside'
))

# Add a vertical dashed line at x=0
fig2.add_shape(
    type="line",
    x0=0, y0=-0.5, x1=0, y1=len(pivot_df) - 0.5,
    line=dict(color="black", width=2, dash="dash")
)

# Customize the layout
fig2.update_layout(
    title="Changes in Median Salaries for Tech Degrees (2022-2024)",
    xaxis_title="Change in Gross Monthly Median Salary",
    xaxis=dict(
        tickformat="$,.0f",
        zeroline=False,
        # Widen the x-axis range to give annotations more space
        range=[min(pivot_df['change_22_23'].min(), pivot_df['change_23_24'].min(), 
               pivot_df['change_22_24'].min()) - 100, 
               max(pivot_df['change_22_23'].max(), pivot_df['change_23_24'].max(), 
               pivot_df['change_22_24'].max()) + 100]
    ),
    yaxis=dict(
        autorange="reversed",
    ),
    # Add spacing between bar groups
    bargap=0.3,
    bargroupgap=0.1,
    barmode='group',
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1
    ),
    # Increase the height of the plot to prevent crowding
    height=600,
    margin=dict(r=30, l=30, t=80, b=30)
)

# Show the figure for Task 2
fig2.show()