In [None]:
import pandas as pd
import numpy as np
import plotly.express as px 
import plotly.graph_objects as go
from scipy import stats
df = pd.read_csv("../queried_csv/1_pay_distribution.csv")

In [16]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 631 entries, 0 to 630
Data columns (total 2 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   company_name     631 non-null    object 
 1   salary_hour_avg  631 non-null    float64
dtypes: float64(1), object(1)
memory usage: 10.0+ KB


In [62]:
fig = go.Figure()

fig.add_trace(go.Histogram(
    x=df['salary_hour_avg'],
    name='Salary Distribution',
    nbinsx=50,
    marker=dict(
        color='rgba(100, 200, 255, 0.8)',
        line=dict(color='rgba(255, 255, 255, 0.8)', width=1)
    ),
    hovertemplate="<b>Count:</b> %{y}<br><b>Hourly Pay Rate:</b> %{x:.1f}<extra></extra>"
))

kde = stats.gaussian_kde(df['salary_hour_avg'].dropna())
x_range = np.linspace(df['salary_hour_avg'].min(), df['salary_hour_avg'].max(), 100)
y_kde = kde(x_range)

scaling_factor = len(df['salary_hour_avg']) * (df['salary_hour_avg'].max() - df['salary_hour_avg'].min()) / 30
y_kde_scaled = y_kde * scaling_factor

fig.add_trace(go.Scatter(
    x=x_range,
    y=y_kde_scaled,
    name='Distribution Line',
    line=dict(color='white', width=3),
    hoverinfo='skip'
))

fig.update_layout(
    title="<b>Distribution of Hourly Pay Rate of Data Engineers</b>",
    title_x=0.5,
    title_y=0.95,
    width=1000,
    height=600,
    title_font=dict(family="verdana", size=20, color="white"),
    paper_bgcolor='#08152A',
    plot_bgcolor='#08152A',
    xaxis_title='Hourly Salary (bin)',
    yaxis_title='Count',
    bargap=0.05,
    showlegend=False,
    font=dict(
        color='white',
        size=14
    ),
    xaxis=dict(
        title_font=dict(family="verdana", size=18, color='white'),
        tickfont=dict(family="verdana", size=14, color='white'),
        gridcolor='rgba(255, 255, 255, 0.1)',
        zerolinecolor='white'
    ),
    yaxis=dict(
        title_font=dict(family="verdana", size=18, color='white'),
        tickfont=dict(family="verdana", size=14, color='white'),
        gridcolor='rgba(255, 255, 255, 0.1)',
        zerolinecolor='white'
    )
)

fig.show()
