In [57]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import ipywidgets as widgets
from IPython.display import display

# Styling
#sns.set(style="whitegrid")
#plt.rcParams['figure.figsize'] = (10, 6)

# Load data
jobs = pd.read_csv('../data/merged/merged_data_jobs.csv')


#experience_slider = widgets.IntSlider(value=5, min=0, max=20, step=1, description='Experience ≤', layout=widgets.Layout(width='60%'))

# ---------- Plot Helpers ----------
def plot_pie(data, labels_col, values_col, title='', colors='pastel', figsize=(7, 7)):
    plt.figure(figsize=figsize)
    plt.pie(data[values_col], labels=data[labels_col], autopct='%1.1f%%', colors=sns.color_palette(colors), startangle=140)
    plt.title(title)
    plt.tight_layout()
    plt.show()

def plot_bar(data, x, y, title='', xlabel='', ylabel='', rotation=90, palette='pastel', figsize=(10, 5)):
    plt.figure(figsize=figsize)
    sns.barplot(data=data, x=x, y=y, palette=palette)
    plt.title(title)
    plt.xlabel(xlabel or x)
    plt.ylabel(ylabel or y)
    plt.xticks(rotation=rotation)
    plt.tight_layout()
    plt.show()


In [58]:
# ---------- Analytics ----------
def jobs_vs_industry(df, vertical = True, top_n=10):
    industry_counts = df['Industry'].value_counts().head(top_n).reset_index()
    x = 'Industry'
    y = 'Number of Jobs'
    industry_counts.columns = [x, y]
    if not vertical:
        industry_counts = industry_counts[[y, x]]
        x, y = y, x
    plot_bar(industry_counts, x, y, f'Top {top_n} Industries by Number of Jobs')

def jobs_vs_country(df, top_n=10):
    country_counts = df['Country'].value_counts().head(top_n).reset_index()
    x = 'Country'
    y = 'Number of Jobs'
    country_counts.columns = [x, y]
    pie_data = pd.DataFrame({'Category': country_counts[x], 'Count': country_counts[y]})
    plot_pie(pie_data, labels_col = 'Category', values_col = 'Count', title=f'Top {top_n} Countries by Number of Jobs')
    #plot_bar(country_counts, 'Country', 'Job Count', f'Top {top_n} Countries by Number of Jobs')

def plot_stacked_city_state(df, top_n=10):
    top_states = df['State'].value_counts().nlargest(top_n).index
    df_filtered = df[df['State'].isin(top_states)]
    grouped = df_filtered.groupby(['State', 'City']).size().reset_index(name='Job Count')
    pivot_table = grouped.pivot(index='State', columns='City', values='Job Count').fillna(0)
    pivot_table.plot(kind='bar', stacked=True, figsize=(14, 7), colormap='tab20')
    plt.title(f"City-wise Job Count within Top {top_n} States")
    plt.xlabel("State")
    plt.ylabel("Number of Jobs")
    plt.legend(title='City', bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout()
    plt.show()

def jobs_vs_city(df, top_n_states=5, top_n_cities=10):
    top_states = df['State'].value_counts().nlargest(top_n_states).index
    df_filtered = df[df['State'].isin(top_states)]
    grouped = df_filtered.groupby(['State', 'City']).size().reset_index(name='Job Count')
    top_cities = grouped.groupby('City')['Job Count'].sum().nlargest(top_n_cities).index
    grouped = grouped[grouped['City'].isin(top_cities)]
    grouped['City_State'] = grouped['City'] + " (" + grouped['State'] + ")"
    grouped = grouped.sort_values(by='Job Count', ascending=False)
    plot_bar(grouped, 'City_State', 'Job Count', f"Top {top_n_cities} Cities in Top {top_n_states} States")

def show_industry_percentage(df, selected_industry):
    total_count = df.shape[0]
    selected_count = df[df['Industry'] == selected_industry].shape[0]
    other_count = total_count - selected_count
    pie_data = pd.DataFrame({'Category': [selected_industry, 'Others'], 'Count': [selected_count, other_count]})
    plot_pie(pie_data, 'Category', 'Count', f"Share of '{selected_industry}' Industry")

def jobs_vs_exp(df, vertical = True):
    exp_counts = df['Experience Category'].value_counts().reset_index()
    x = 'Experience Category'
    y = 'Number of Jobs'
    exp_counts.columns = [x, y]
    if not vertical:
        exp_counts = exp_counts[[y, x]]
        x, y = y, x
    plot_bar(exp_counts, x, y, f'Experience Level by Number of Jobs')

In [59]:
#def apply_filters(df, country, state, city = 'All', industry, exp_category, exp, ignore_column=''):
def apply_filters(df, country, state, industry, exp_category, exp = 0, ignore_column=''):
    # Country
    if ignore_column != 'country' and country != 'All':
        df = df[df['Country'] == country]
    
    # State
    if ignore_column != 'state' and state != 'All':
        df = df[df['State'] == state]
    
    # City
    #if ignore_column != 'city' and city != 'All':
    #    df = df[df['City'] == city]
    
    # Industry
    if ignore_column != 'industry' and industry != 'All':
        df = df[df['Industry'] == industry]
    
    # Experience Category & Experience (In Years)
    if ignore_column not in ['exp_category', 'exp']:
        if exp_category != 'All':
            df = df[df['Experience Category'] == exp_category]
        
        #df = df[df['Experience (In Years)'] <= exp]

    if df.empty:
        print("⚠️ No data available for selected filters.")
        return pd.DataFrame()

    return df


In [60]:
def plot_analytics(top_n, country, state, industry, exp_category, exp = 0):
    # Apply all filters (no columns ignored)
    filtered_df = apply_filters(jobs, country, state, industry, exp_category)
    without_industry_df = apply_filters(jobs, country, state, industry, exp_category, ignore_column = 'industry')
    
    if filtered_df is None or filtered_df.empty:
        return
    
    if industry == 'All':
        jobs_vs_industry(filtered_df, top_n)
    else:
        # Show share of selected industry vs others in the filtered subset
        show_industry_percentage(without_industry_df, industry)
    #if city == 'All':
    jobs_vs_city(filtered_df) #city
    #print("Jobs vs Country")
    jobs_vs_country(filtered_df, top_n) #country
    #print("Jobs vs States")
    if state == 'All':
        plot_stacked_city_state(filtered_df) #state
    else:
        # Pie chart
        pass

In [61]:
# ---------- Widgets ----------
top_n_input = widgets.IntText(value=10, description='Top N:', layout=widgets.Layout(width='30%'))

dropdown_style = widgets.Layout(width='40%')
industry_dd = widgets.Dropdown(options=['All'] + sorted(jobs['Industry'].dropna().unique()), description='Industry:', layout=dropdown_style)
company_dd = widgets.Dropdown(options=['All'] + sorted(jobs['Company Name'].dropna().unique()), description='Company:', layout=dropdown_style)
country_dd = widgets.Dropdown(options=['All'] + sorted(jobs['Country'].dropna().astype(str).unique()), description='Country:', layout=dropdown_style)
state_dd = widgets.Dropdown(options=['All'] + sorted(jobs['State'].dropna().astype(str).unique()), description='State:', layout=dropdown_style)
city_dd = widgets.Dropdown(options=['All'] + sorted(jobs['City'].dropna().astype(str).unique()), description='City:', layout=dropdown_style)
exp_cat_dd = widgets.Dropdown(options=['All'] + sorted(jobs['Experience Category'].dropna().unique()), description='Exp. Level:', layout=dropdown_style)

# Create Output widgets for each general visualization
out_industry = widgets.Output()
out_country = widgets.Output()
out_experience = widgets.Output()
out_city = widgets.Output()

with out_industry:
    jobs_vs_industry(jobs, vertical = False, top_n=jobs['Industry'].nunique())

with out_country:
    jobs_vs_country(jobs, top_n=jobs['Country'].nunique())

with out_experience:
    jobs_vs_exp(jobs)

with out_city:
    plot_stacked_city_state(jobs, top_n=jobs['State'].nunique())


In [96]:
from datetime import datetime
# Create save button
save_button = widgets.Button(
    description="Save Report",
    button_style='success',
    tooltip='Click to save the dashboard as PNG',
    icon='download'
)

# Define save function
def save_report_callback(b):
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    filename = f"visualization_report_{timestamp}.png"
    try:
        fig.savefig(filename, dpi=300)
        print(f"✅ Report saved as '{filename}'")
    except Exception as e:
        print(f'Error saving report: {e}')

# Link button to function
save_button.on_click(save_report_callback)

# Display the button
#display(save_button)

In [100]:
import ipywidgets as widgets
from IPython.display import display

# Define filter panel with proper layout
filters = widgets.VBox([
    widgets.HTML("<b>Select the filters below:</b>"),
    top_n_input,
    industry_dd,
    country_dd,
    state_dd,
    exp_cat_dd
], layout=widgets.Layout(width='100%', padding='5px'))

# Define the interactive plot output
spec_views = widgets.interactive_output(
    plot_analytics,
    {
        'top_n': top_n_input,
        'country': country_dd,
        'state': state_dd,
        'industry': industry_dd,
        'exp_category': exp_cat_dd
    }
)

# Set layout width for the plot output
spec_views.layout = widgets.Layout(width='100%', padding='10px')

# Create the full custom display
customised_display = widgets.VBox([
    widgets.HTML("<h2 style='color:#333;'>Interactive visualizations</h2>"),
    
    widgets.HBox([
        filters,        # Left side: filters
        widgets.VBox([  # Right side: plots
            widgets.HTML("<h4>Customised Views for Analysis</h4>"),
            spec_views
        ])
    ], layout=widgets.Layout(width='100%'))
])

gen_view = widgets.VBox([
    widgets.HTML("<h2 style='color:#333;'>General Overview</h2>"),
    
    widgets.HBox([
        widgets.VBox([
            widgets.HTML("<h4>Top Industries by number of jobs</h4>"),
            out_industry
        ]),
        widgets.VBox([
            widgets.HTML("<h4>Experience Level by Number of Jobs</h4>"),
            out_experience
        ])
    ]),
    
    widgets.HBox([
        widgets.VBox([
            widgets.HTML("<h4>Top Countries by Job Count</h4>"),
            out_country
        ]),
        widgets.VBox([
            widgets.HTML("<h4>Top Cities in Top States</h4>"),
            out_city
        ])
    ])
])


In [None]:
display(gen_view)   # General summary

In [None]:
display(customised_display)    # Interactive filters and visuals

In [101]:
dashboard = widgets.VBox([
    widgets.HTML("<h1 style='color:#2c3e50;'>Job Analytics Dashboard</h1>"),
    gen_view,
    widgets.HTML("<hr>"),
    customised_display,    # Interactive filters and visuals,
    widgets.HTML("<hr>"),
    #widgets.HBox([save_button], layout=widgets.Layout(justify_content='flex-start'))
])

# Display the full dashboard
display(dashboard)

VBox(children=(HTML(value="<h1 style='color:#2c3e50;'>Job Analytics Dashboard</h1>"), VBox(children=(HTML(valu…