In [1]:
import pandas as pd
import plotly.express as px 
import plotly.graph_objects as go
import numpy as np
df = pd.read_csv("../queried_csv/2_top_paying_opportunities.csv")

In [2]:
# Transform data
df['job_no_degree_mention'] = df['job_no_degree_mention'].fillna(0)
df['job_no_degree_mention'] = df['job_no_degree_mention'].replace({
    1: 'No degree required', 
    0: 'Degree required'
})
df

Unnamed: 0,job_title,salary_hour_avg,job_posted_date,job_country,company_name,job_no_degree_mention
0,Data Engineer,117.5,2023-09-28 15:53:54,Sudan,Upwork,No degree required
1,Data Engineer,105.0,2023-10-04 21:18:25,Sudan,SSI People,No degree required
2,Data Engineer,100.0,2023-07-21 14:10:48,United States,Wright Technical Services,Degree required
3,Data Engineer,100.0,2023-04-06 18:05:08,United States,BayOne Solutions,Degree required
4,Data Engineer,100.0,2023-09-15 19:09:03,United States,Upwork,No degree required
5,Data Engineer,100.0,2023-01-31 01:09:50,United States,Atlantic Partners Corporation,Degree required
6,Data Engineer,97.5,2023-01-30 16:05:19,United States,Brooksource,No degree required
7,Data Engineer,96.5,2023-10-13 18:27:18,United States,Motion Recruitment,No degree required
8,Data Engineer,95.0,2023-02-21 09:27:41,United States,Alium,Degree required
9,Data Engineer,95.0,2023-10-12 23:28:34,United States,Motion Recruitment,Degree required


In [3]:
# Scaling function to make differences more visible
def scale_values(values, min_size=15, max_size=80):
    scaled = np.log1p(values) 
    scaled = (scaled - scaled.min()) / (scaled.max() - scaled.min())
    scaled = scaled * (max_size - min_size) + min_size
    return scaled
df_plot = df.copy()
df_plot['scaled_salary'] = scale_values(df_plot['salary_hour_avg'])

# Define a color map for the categorical variable
job_types = df['job_no_degree_mention'].unique()
color_map = {
    "No degree required": "#FF8A8A",  # Soft red
    "Degree required": "#85C7E3"      # Soft blue
}

# Build the treemap
fig = go.Figure(go.Treemap(
    labels=df.index,
    parents=[""] * len(df),
    values=df_plot['scaled_salary'],
    marker=dict(
        colors=[color_map[job] for job in df['job_no_degree_mention']],
        line=dict(color='white', width=1)  
    ),
    textinfo="none",  
    texttemplate="<br>".join([  
        "<b>Hourly Rate:</b> %{customdata[0]:,.2f}",
        "<b>Company:</b> %{customdata[1]}",
        "%{customdata[2]}"
    ]),
    customdata=np.stack([df['salary_hour_avg'], df['company_name'], df['job_no_degree_mention']], axis=-1),
    textfont=dict(family="verdana", size=16, color="white"),  
    hovertemplate="<b>Hourly Rate:</b> %{customdata[0]:,.2f}<br>" + 
                  "<b>Company:</b> %{customdata[1]}<br>" +
                  "<b>Job:</b> %{customdata[2]}<extra></extra>" 
))

fig.update_layout(
    title="<b>Treemap of Salary per Instance with Job Type Colors</b>",
    title_x=0.5,
    title_y=0.90,
    width=1000,
    height=800,
    font=dict(family="verdana", size=20, color="white"),
    paper_bgcolor='#08152A'
)

fig.show()