### Dash 
Link: https://dash.plotly.com/tutorial
To install: pip install dash
Can Run: app.py file in jupyter notebook


In [1]:
# import library
import pandas as pd
import numpy as np
import dash
import plotly.express as px
from jupyter_dash import JupyterDash
from dash import Dash, dcc, html, Input, Output
import plotly.graph_objects as go
from wordcloud import WordCloud
import io
import base64
import matplotlib.pyplot as plt
from collections import Counter
import dash_bootstrap_components as dbc
from io import BytesIO
from collections import Counter
from PIL import Image
from datetime import datetime, timedelta
import json

# load data
path = "Cleaned_2021_2023_Data.csv"
df = pd.read_csv(path)
print(df.shape)
df.info()

(84590, 17)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 84590 entries, 0 to 84589
Data columns (total 17 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Unnamed: 0       84590 non-null  int64 
 1   Job_Title        84590 non-null  object
 2   Company_Name     84590 non-null  object
 3   Position_Level   84590 non-null  object
 4   Year_Experience  84590 non-null  int64 
 5   Hiring           84590 non-null  int64 
 6   Salary           84590 non-null  object
 7   Gender           84590 non-null  object
 8   Age              84590 non-null  int64 
 9   Job_Type         84590 non-null  object
 10  Job_Category     84590 non-null  object
 11  Industry         84590 non-null  object
 12  Qualification    84590 non-null  object
 13  Language         84590 non-null  object
 14  Location         84583 non-null  object
 15  Publish_Date     84590 non-null  object
 16  Closing_Date     84590 non-null  object
dtypes: int64(4), object

In [2]:
print(df['Salary'].value_counts())

print(df['Job_Category'].value_counts())

Negotiable     69065
$200-$500       8200
$500-$999       3937
$1000-$2000     1950
<$200            994
>$2000           370
>$6000            74
Name: Salary, dtype: int64
Sales                                   11494
IT                                       7130
Accounting                               6583
Bank/Insurance                           5861
Marketing                                5641
Architecture/Engineering                 4749
Education/Training                       3537
Administration                           3201
Management                               3089
Human Resource                           3024
Others                                   2316
Logistics/Shipping/Deliver/Warehouse     2253
Operation/Production                     2223
Design                                   2086
Customer Service                         2062
Technician/Maintenance                   1948
Finance                                  1725
Assistant/Secretary                      153

In [3]:
# Separate d/m/date/y from transaction date
df['Publish_Date'] = pd.to_datetime(df['Publish_Date'])
df['MonthYear'] = df['Publish_Date'].dt.strftime('%b %Y')
df['MonthYear'] = df['MonthYear'].str.strip()
df.head()

Unnamed: 0.1,Unnamed: 0,Job_Title,Company_Name,Position_Level,Year_Experience,Hiring,Salary,Gender,Age,Job_Type,Job_Category,Industry,Qualification,Language,Location,Publish_Date,Closing_Date,MonthYear
0,0,Technician,S&V Cambodia Jobs,Middle,2,1,Negotiable,Male,24,Full-Time,Technician/Maintenance,Recruiting Services,Bachelor's Degree,English,Phnom Penh,2023-02-17,3/19/2023,Feb 2023
1,2,Executive Secretary The Vice President Academi...,ក្រុមហ៊ុន ម៉េងលី ជេ. គួច អេឌ្យូខេសិន (Mengly J...,Middle,2,1,Negotiable,Male/Female,18,Full-Time,Education/Training,Education,Bachelor's Degree,English,Phnom Penh,2023-02-17,3/19/2023,Feb 2023
2,3,Event Planner,ក្រុមហ៊ុន ម៉េងលី ជេ. គួច អេឌ្យូខេសិន (Mengly J...,Entry,1,1,Negotiable,Male/Female,18,Full-Time,Administration,Education,Bachelor's Degree,English,Siem Reap,2023-02-17,3/19/2023,Feb 2023
3,4,Digital Marketing Officer,ក្រុមហ៊ុន ម៉េងលី ជេ. គួច អេឌ្យូខេសិន (Mengly J...,Entry,2,1,Negotiable,Male/Female,18,Full-Time,Marketing,Education,Bachelor's Degree,English,Phnom Penh,2023-02-17,3/19/2023,Feb 2023
4,5,Deputy Director,ក្រុមហ៊ុន ម៉េងលី ជេ. គួច អេឌ្យូខេសិន (Mengly J...,Senior,1,3,Negotiable,Male/Female,18,Full-Time,Education/Training,Education,Master's Degree,English,Takeo,2023-02-17,3/19/2023,Feb 2023


In [4]:
lag = df['Language'].value_counts()
lag

English                                             69912
Others                                               6989
Chinese-Mandarin/English                             2886
English/Khmer                                        1632
Chinese-Mandarin                                      961
                                                    ...  
Chinese-Mandarin/Japanese/Korean                        1
English/Khmer/Others                                    1
Chinese-Cantonese/Chinese-Mandarin/English/Khmer        1
Chinese-Mandarin/Japanese                               1
Chinese-Mandarin/Khmer/Korean                           1
Name: Language, Length: 62, dtype: int64

In [5]:
df_lag = pd.DataFrame(df['Language'])
df_lag

Unnamed: 0,Language
0,English
1,English
2,English
3,English
4,English
...,...
84585,English
84586,English
84587,English
84588,English


In [6]:
# Extract individual words from combined language strings and count occurrences
lag_counts = Counter()
for lang in df['Language']:
    words = lang.split('/')
    unique_words = set(words)  # Keep only unique words in the language string
    lag_counts.update(unique_words)
    
# Create a DataFrame from the word counts
lag_counts_df = pd.DataFrame(lag_counts.items(), columns=['laguage', 'Frequency'])
lag_counts_df

Unnamed: 0,laguage,Frequency
0,English,75489
1,Khmer,2607
2,Chinese-Mandarin,4158
3,Others,6994
4,Japanese,283
5,Vietnamese,94
6,Chinese-Cantonese,126
7,Korean,344
8,French,161
9,Thai,196


In [7]:
df.to_csv('dataset.csv', index=False)

### Visulization

We use Potly Dash to create 3 container

In [31]:
# set up the dash app
app = Dash(__name__)

# Set up style
title_style = {'color':'#ffffff', 'font-weight': 'bold',
               'font-family': "Fira Sans, sans-serif",'fontSize': '20px',
               'margin-top':'20px','margin-bottom': '20px'}
graph_dropdown_style = {'background-color':'#ffffff',
        'width': '100%',
        'color': '#0037FF',
        'text-align': 'None',
        'font-family': "Fira Sans, sans-serif",
        'font-size': '14px',
        'width': "18rem",
        'padding':'None',
        'margin':'None',
        'margin-top':'10px',
        'margin-bottom': '2px',
        'border': '3px #f7e99a',
        'border-radius': '2px',
        'background-image': 'None',
        #'box-shadow': '2px 2px 5px #999999',
        'box-shadow': 'rgba(0, 0, 0, 0.02) 0px 1px 3px 0px, rgba(27, 31, 35, 0.15) 0px 0px 0px 1px',
        'cursor': 'True',
        'font-style': 'None',
        'font-weight': 'normal'}
dropdown_style = {'background-color':'#12284C',
                                     'width': '100%',
                                     'color': '#0b0854',
                                     'text-align': 'None',
                                     'font-family': "Fira Sans, sans-serif",
                                     'font-size': '14px',
                                     'padding':'None',
                                     'margin':'None',
                                     'margin-top':'10px',
                                     'margin-bottom': '10px',
                                     'border': '3px lightblue',
                                     'border-radius': '10px',
                                     'background-image': 'None',
                                     #'box-shadow': '2px 2px 5px #999999',
                                     'cursor': 'True',
                                     'font-style': 'None',
                                     'font-weight': 'normal'}
checklist_style = {'background-color':'#12284C',
                                     'width': '100',
                                     'display': 'flex',
                                     'flex-direction': 'column',
                                     'color': '#CCCCCC',
                                     'font-family': "Fira Sans, sans-serif",
                                     'font-size': '14px',
                                     'margin-left': '20px',
                                     'margin-bottom': '10px',
                                     'margin-top':'10px',
                                     'font-weight':'550',
                                     'border': '3px #f7e99a',
                                     'border-radius': '10px'}
                                     #'box-shadow': '2px 2px 5px #999999'}
card_style = { "width": "18rem",
            "backgroundColor": "#ffffff",  # Background color
            "color": "#12284C",  # Text color
            #"border": "2px solid #007bff",  # Border with blue color
            "borderRadius": "20px",  # Rounded corners
            "boxShadow": "0 5px 10px rgba(0, 0, 0, 0.1)",  # Shadow effect
            "padding": "10px",  # Spacing inside the card},
            "verticalAlign": "center",  # Align cards at the top
            "fontSize": "14px",  # Font size of the card text
            "textAlign": "center",  # Center-align the card text
            "font-family": "Fira Sans, sans-serif",
            "height": "100px", } # Set the height of the card to 200 pixels


# Convert 'MonthYear' column to datetime type
df['MonthYear'] = pd.to_datetime(df['MonthYear'])
num_positions = len(df['Job_Title'])
formatted_num_positions = "{:,}".format(num_positions)
num_hiring = sum(df['Hiring'])
formatted_num_hiring = "{:,}".format(num_hiring)

# Create a card to display the number of positions
card_position = dbc.Card(
        dbc.CardBody(
            [
                html.H4("Total Positions", className="card-title"),
                html.H1(formatted_num_positions, className="card-text"),
            ]
        ),

    color="#b2e6c6",
    inverse=True,
    style=card_style
)

card_hiring = dbc.Card(
    [
        dbc.CardBody(
            [
                html.H4("Total of Hiring", className="card-title"),
                html.H1(formatted_num_hiring, className="card-text"),
            ]
        )
    ],
    color="primary",
    inverse=True,
    style=card_style
)

card_categories = dbc.Card(
    [
        dbc.CardBody(
            [
                html.H4("Total Job Category", className="card-title"),
                html.H1(len(df['Job_Category'].unique()), className="card-text"),
            ]
        )
    ],
    color="primary",
    inverse=True,
    style=card_style
)

card_industry = dbc.Card(
    [
        dbc.CardBody(
            [
                html.H4("Total Industry", className="card-title"),
                html.H1(len(df['Industry'].unique()), className="card-text"),
            ]
        )
    ],
    color="primary",
    inverse=True,
    style=card_style
)

# Set up app layout
app.layout = html.Div(
    className = "container",
    style = {'display': 'flex', 'flex-direction': 'column','justify-content': 'space-between','background-color': '#12284C'},
    children = [
        html.Div(
            className = "header",
            style = {'flex':'0','padding': '0px'},
            children = [
                # html.Img(
                #     src= 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRgJVILdD1h8PlbdJ_61yXjth8_8ATpkDmnZb1UqXuw&s',  # Replace with actual path
                #     style={'height': '500px', 'width':'00px'}  # Adjust the height as needed
                # ),​​​​​​
                html.H1('Cambodia Job Visualization', 
                        style={'text-align': 'center','color':'#FF4438',
                               'font-family': "Fira Sans, sans-serif",
                                'fontSize': '50px',
                                }),
            ]
        ),
        
        html.Div(
            className = "content",
            style = {'display': 'flex', 'flex-direction': 'row'},
            children = [
                html.Div(
                    className = "filters",
                    style = {'flex':'0.5','padding': '10px','justify-content':'flex-start','width':'100','background-color': '#12284C'},
                    children = [
                        html.Label(children = ['Job Category'], style = title_style),
                        dcc.Dropdown(
                            id = 'job-category-dropdown',
                            options = [{'label': category, 'value': category} for category in df['Job_Category'].unique()],
                            value = [],
                            multi = True,
                            searchable = True,
                            clearable = True,
                            placeholder = "Select job category",
                            style = dropdown_style
                        ),
                        html.Label(children = ['Industry'], style = title_style),
                        dcc.Dropdown(
                            id = 'industry-dropdown',
                            options = [{'label': industry, 'value': industry} for industry in df['Industry'].unique()],
                            value = [],
                            multi = True,
                            searchable = True,
                            clearable = True,
                            placeholder = "Select industry",
                            style = dropdown_style
                        ),
                        html.Label(children = ['Qualification'], style = title_style),
                        dcc.Dropdown(
                            id = 'qualification-dropdown',
                            options = [{'label': qualification, 'value': qualification} for qualification in df['Qualification'].unique()],
                            value = [],
                            multi = True,
                            searchable = True,
                            clearable = True,
                            placeholder = "Select qualification",
                            style = dropdown_style
                        ),
                        html.Label(children = ['Language'], style = title_style),
                        dcc.Dropdown(
                            id = 'language-dropdown',
                            options = [{'label': language, 'value': language} for language in df['Language'].unique()],
                            value = [],
                            multi = True,
                            searchable = True,
                            clearable = True,
                            placeholder = "Select language",
                            style = dropdown_style
                        ),
                        html.Label(children = ['Gender'], style = title_style),
                        dcc.Checklist(
                            id = 'gender-checkboxes',
                            options = [{'label': gender, 'value': gender} for gender in df['Gender'].unique()],
                            value = [],
                            inline = True,
                            style = checklist_style,
                            labelStyle={'padding': '5px'}
                        ),
                        html.Label(children = ['Job Type'], style = title_style),
                        dcc.Checklist(
                            id = 'job-type-checkboxes',
                            options = [{'label': job_type, 'value': job_type} for job_type in df['Job_Type'].unique()],
                            value = [],
                            inline = False,
                            style = checklist_style,
                            labelStyle={'padding': '5px'}
                        ),
                        html.Label(children = ['Position Level'], style = title_style),
                        dcc.Checklist(
                            id = 'position-level-checkboxes',
                            options = [{'label': position_level, 'value': position_level} for position_level in df['Position_Level'].unique()],
                            value = [],
                            inline = False,
                            style = checklist_style,
                            labelStyle={'padding': '5px'}
                        ),
                    ],
                ),
                
                html.Div(
                    className = "figure",
                    style = {'flex': '2','padding': '10px',
                             'justify-content': 'flex-end',
                             'width':'500',
                             'background-color': '#CCCCCC'},
                    children = [
                    html.Div(
                        [
                            card_position,
                            card_hiring,

                        ],
                        style={"display": "flex",
                               "justify-content": "space-around",
                               "margin": "20px"},
                    ),  

                    #dcc.Graph(id = "pie-graph"),
                    #html.Label(children = ['Select Total Number of Hirng base on Y-axis Variable: '], style = title_style),
                    dcc.Dropdown(
                        id="y-axis-dropdown",
                        options=[
                            {"label": html.Span(['Total Number of Hiring by Salary']),
                                                #style={'color': '#000000'}),
                                                "value": "Salary",
                                                "search": "Total Number of Hiring by Salary"},
                            {"label": html.Span(['Total Number of Hiring by Gender']),
                                                #style={'color': '#000000'}),
                                                "value": "Gender",
                                                "search": "Total Number of Hiring by Gender"},
                            {"label": html.Span(['Total Number of Hiring by Job Type']),
                                                #style={'color': '#000000'}),
                                                "value": "Job_Type",
                                                "search": "Total Number of Hiring by Job Type"},
                            {"label": html.Span(['Total Number of Hiring by Position Level']),
                                                #style={'color': '#000000'}),
                                                "value": "Position_Level",
                                                "search": "Total Number of Hiring by Position Level"},
                            {"label": html.Span(['Total Number of Hiring by Job Category']),
                                                #style={'color': '#000000'}),
                                                "value": "Job_Category",
                                                "search": "Total Number of Hiring by Job Category"},
                            {"label": html.Span(['Total Number of Hiring by Industry']),
                                                #style={'color': '#000000'}),
                                                "value": "Industry",
                                                "search": "Total Number of Hiring by Industry"},
                            {"label": html.Span(['Total Number of Hiring by Qualification']),
                                                #style={'color': '#000000'}),
                                                "value": "Qualification",
                                                "search": "Total Number of Hiring by Qualification"},
                        ],
                        value = "Salary",
                        placeholder = "Select here to see more",
                        style = graph_dropdown_style
                    ),
                    
                    dcc.Graph(id = "bar-graph"),
                    #dcc.Graph(id="word-cloud-lag", figure={}),
                    #dcc.Graph(id="word-cloud-loc", figure={}),
                    

                    dcc.Dropdown(
                    id='word-cloud-selector',
                    options=[
                        {"label": html.Span(['Frequency of language needed']),
                                            #style={'color': '#000000'}),
                                            "value": "cloud1",
                                            "search": "Frequency of language needed"},
                        {"label": html.Span(['Frequency of location needed']),
                                            #style={'color': '#000000'}),
                                            "value": "cloud2",
                                            "search": "Frequency of location needed"},
                        {"label": html.Span(['Frequency of Job title needed']),
                                            #style={'color': '#000000'}),
                                            "value": "cloud3",
                                            "search": "Frequency of Job title needed"},
                    ],
                    value= "cloud1", # Default value 
                    placeholder = "Select here to see more",
                    style = graph_dropdown_style
                    ),
                    dcc.Graph(id='word-cloud-output',figure={}),

                    html.Label(children = ['Year Experience'], 
                                style = {'color':'#12284C', 'font-weight': 'bold',
                                        'font-family': "Fira Sans, sans-serif",'fontSize': '20px',
                                        'margin-top':'10px','margin-bottom': '10px'}),
                    
                    dcc.RangeSlider(
                        id='range-slider',
                        min=df['Year_Experience'].min(),
                        max=df['Year_Experience'].max(),
                        step=2,
                        marks={str(year): str(year) for year in range(df['Year_Experience'].min(),df['Year_Experience'].max()+1,2)},
                        value=[df['Year_Experience'].min(), df['Year_Experience'].max()],
                        vertical=False,
                        disabled=False,
                        dots=True,
                        included=True,
                        pushable=True,
                        tooltip={'placement': 'bottom'},
                        updatemode='mouseup',
                        # Adjust the margin-top value to control the spacing
                    ),
                
                    ],
                ),
             
                html.Div(
                    className = "figure",
                    style = {'flex': '2', 'padding': '10px','justify-content': 'flex-end','width':'500','background-color': '#CCCCCC'},
                    children = [
                                            
                    html.Div(
                        [
                            card_categories,
                            card_industry,
                        ],
                        style={"display": "flex", "justify-content": "space-around", "margin": "20px"},
                    ), 
                        
                    #dcc.Graph(id = "line-graph"),
                    #dcc.Graph(id = "scatter-graph"),
                    dcc.Dropdown(
                    id='graph-selector',
                    options=[
                            {"label": html.Span(['Number of hiring each month']),
                                                #style={'color': '#000000'}),
                                                "value": "line",
                                                "search": "Number of hiring each month"},
                            {"label": html.Span(['Number of hiring by average age']),
                                                #style={'color': '#000000'}),
                                                "value": "scatter",
                                                "search": "Number of hiring by average age"},
                    ],
                    value='line', # Default value 
                    placeholder = "Select here to see more",
                    style = graph_dropdown_style
                    ),
                    dcc.Graph(id='graph-output'),
                
                
                    html.Div(
                        [
                            dcc.Dropdown(
                                id='top-n-dropdown',
                                options = [{'label': f'Top {i}', 'value': i} for i in range(5, 101, 5)],  # Options from 1 to 10    
                                value= 5,  # Default value
                                placeholder="Select Top N",
                                style = graph_dropdown_style
                            ),
                            dcc.Dropdown(
                                id="y-axis-dropdown-treemap",
                                options=[
                                    {"label": html.Span(['Top Hiring Company']),
                                                        #style={'color': '#000000'}),
                                                        "value": "Company",
                                                        "search": "Top hiring company"},
                                    {"label": html.Span(['Top Job Title Frequency']),
                                                        #style={'color': '#000000'}),
                                                        "value": "Job",
                                                        "search": "Top job title frequency"},
                                ],
                                value = "Company",
                                placeholder = "Select here to see more",
                                style = graph_dropdown_style
                            ),
                        ],
                        style={"display": "flex", "justify-content": "space-around"},
                    ), 
                    
                    dcc.Graph(id = "treemap",figure={}),   
                    ],
                ),
            ],
        ),
    ],
)

# Connect the Plotly graphs with Dash Components
@app.callback(
    #Output("line-graph", "figure"),
    #Output("scatter-graph", "figure"),
    #Output("pie-graph", "figure"),
    Output("graph-output", "figure"),
    Output("bar-graph", "figure"),
    Output("treemap", "figure"),
    Output("word-cloud-output", "figure"),
    
    #Output("word-cloud-lag", "figure"),
    #Output("word-cloud-loc", "figure"),
    Input("range-slider", "value"),
    Input("job-category-dropdown", "value"),
    Input("industry-dropdown", "value"),
    Input("qualification-dropdown", "value"),
    Input("language-dropdown", "value"),
    Input("gender-checkboxes", "value"),
    Input("job-type-checkboxes", "value"),
    Input("position-level-checkboxes", "value"),
    Input("y-axis-dropdown", "value"),
    Input("y-axis-dropdown-treemap", "value"), 
    Input("graph-selector", "value"),
    Input("top-n-dropdown", "value"),
    Input("word-cloud-selector", "value"),
    # Input("location-y-axis-dropdown", "value")
)

# make graph update
def update_charts(slider_range, job_categories, industry, qualifications, language, gender, job_types, position_levels, y_axis_variable, y_axis_treemap,selected_graph,top_n, graph_wordcloud):
    low, high = slider_range

    # Apply filters for Year Experience, Job Category, Qualification, Language, Job Type, Position Level
    mask = (
        (df['Year_Experience'] > low) & 
        (df['Year_Experience'] < high) &
        (df['Job_Category'].isin(job_categories) if job_categories else True) &
        (df['Industry'].isin(industry) if industry else True) &
        (df['Qualification'].isin(qualifications) if qualifications else True) &
        (df['Language'].isin(language) if language else True) &
        (df['Gender'].isin(gender) if gender else True) &
        (df['Job_Type'].isin(job_types) if job_types else True) &
        (df['Position_Level'].isin(position_levels) if position_levels else True) 
    )


    # # Count Numer of Gender
    # grouped_df_pie = df[mask].groupby('Gender').size().reset_index(name='Count')

    # pie_fig = px.pie(
    #     grouped_df_pie, values='Count', names='Gender'
        
    # )

    # pie_fig.update_layout(
    #     height=500,
    #     #width=300,
    #     margin=dict(l=40, r=40, t=50, b=10),
    #     title=dict(text='Gender Distribution', x=0.5),
    #     legend=dict(orientation="h",yanchor="bottom",xanchor="center",x=0.5) 
    # )
    
    # Bar graph for total of hiring
    # Create the figure layout

    bar_fig = go.Figure()

    # Number of hiring by Salary
    grouped_df_salary = df[mask].groupby(['Salary'])['Hiring'].sum().reset_index()
    grouped_df_salary = grouped_df_salary.sort_values('Hiring', ascending=True)

    # Number of hiring by gender
    grouped_df_gender = df[mask].groupby(['Gender'])['Hiring'].sum().reset_index()
    grouped_df_gender = grouped_df_gender.sort_values('Hiring', ascending=True)

    # Number of hiring by Job Type
    grouped_df_jobtype = df[mask].groupby(['Job_Type'])['Hiring'].sum().reset_index()
    grouped_df_jobtype = grouped_df_jobtype.sort_values('Hiring', ascending=True)
    
    # Number of hiring by Position Level
    grouped_df_positionlevel = df[mask].groupby(['Position_Level'])['Hiring'].sum().reset_index()
    grouped_df_positionlevel = grouped_df_positionlevel.sort_values('Hiring', ascending=True)
    
    # Number of hiring by Job Category
    grouped_df_jobcategory = df[mask].groupby(['Job_Category'])['Hiring'].sum().reset_index()
    grouped_df_jobcategory = grouped_df_jobcategory.sort_values('Hiring', ascending=True)
    
    # Number of hiring by Industry
    grouped_df_industry = df[mask].groupby(['Industry'])['Hiring'].sum().reset_index()
    grouped_df_industry = grouped_df_industry.sort_values('Hiring', ascending=True)
    
    # Number of hiring by Qualification
    grouped_df_qualification = df[mask].groupby(['Qualification'])['Hiring'].sum().reset_index()
    grouped_df_qualification = grouped_df_qualification.sort_values('Hiring', ascending=True)
    

    # Set the Y-axis variable based on the dropdown value
    if y_axis_variable == 'Salary':
        bar_data = grouped_df_salary
        title = "<b>Total Number of Hiring by Salary<b>"
        x_variable = "Hiring"
        y_variable = "Salary"
        color = '#ecc17b'
    elif y_axis_variable == 'Gender':
        bar_data = grouped_df_gender
        title = "<b>Total Number of Hiring by Gender<b>"
        x_variable = "Hiring"
        y_variable = "Gender"
        xaxis_type = 'linear'
        color = '#00aeff'
    elif y_axis_variable == 'Job_Type':
        bar_data = grouped_df_jobtype
        title = "<b>Total Number of Hiring by Job Type<b>"
        x_variable = "Hiring"
        y_variable = "Job_Type"
        color = '#37bcb6'
    elif y_axis_variable == 'Position_Level':
        bar_data = grouped_df_positionlevel
        title = "<b>Total Number of Hiring by Position Level<b>"
        x_variable = "Hiring"
        y_variable = "Position_Level"
        color = '#a7c5f9'
    elif y_axis_variable == 'Job_Category':
        bar_data = grouped_df_jobcategory
        title = "<b>Total Number of Hiring by Job Category<b>"
        x_variable = "Hiring"
        y_variable = "Job_Category"
        color = '#7582e4'
    elif y_axis_variable == 'Industry':
        bar_data = grouped_df_industry
        title = "<b>Total Number of Hiring by Industry<b>"
        x_variable = "Hiring"
        y_variable = "Industry"
        color = '#1d79cb'
    else:
        bar_data = grouped_df_qualification
        title = "<b>Total Number of Hiring by Qualification<b>"
        x_variable = "Hiring"
        y_variable = "Qualification"
        color = '#ecc17b'

    xaxis_type = 'log'
    orientation =   'h'
   
    # Create the bar trace
    bar_fig.add_trace(go.Bar(
        x = bar_data[x_variable],
        y = bar_data[y_variable],
        name = title,
        marker_color=color,
        orientation = orientation,
        textposition='inside',
    ))

    # Update the figure layout
    bar_fig.update_layout(
        height=500,
        margin=dict(l=40, r=40, t=50, b=30),
        title=title,
        xaxis_type=xaxis_type,
        xaxis_title=x_variable,
        yaxis_title="Total Hiring",
        title_font_family = "Fira Sans, sans-serif",
        title_x = 0.5,
        font_color = "blue",
        title_font = dict(size=20, family="Fira Sans, sans-serif", color='#674ea7'),
        plot_bgcolor='#ffffff',# Set the background color of the plot area
        paper_bgcolor='#ffffff',  # Set the background color outside the plot
        xaxis_showgrid=False,  # Remove x-axis gridlines
        yaxis_showgrid=False,  # Remove y-axis gridlines
    )
    
    # Set up wordcloud for lang, location, job title
    cloud_fig = go.Figure()

    if graph_wordcloud == 'cloud1':
        # Generate the word cloud image for language
        lag_counts = Counter()
        for lang in df[mask]['Language']:
            words = lang.split('/')
            unique_words = set(words)  # Keep only unique words in the language string
            lag_counts.update(unique_words)

        # Create a DataFrame from the word counts
        lag_counts_df = pd.DataFrame(lag_counts.items(), columns=['language', 'Frequency'])

        # Generate a word cloud from the filtered languages
        wordcloud = WordCloud(width=1200, height=600, 
                              background_color='#e9f3fb',
                              #font_path='Calibri',
                              font_step=2).generate_from_frequencies(lag_counts_df.set_index('language')['Frequency'])

        # Save the word cloud image to a BytesIO object
        img_buffer = BytesIO()
        wordcloud.to_image().save(img_buffer, format="PNG")

        # Encode the image data as base64
        img_data = base64.b64encode(img_buffer.getvalue())

        # Create a Plotly figure with an image trace for language
        word_cloud_lag = {
            "data": [
                {
                    "type": "image",
                    "source": f"data:image/png;base64,{img_data.decode()}",
                    "x": 2,
                    "y": 2,
                    "xref": "paper",
                    "yref": "paper",
                    "sizex": 100,
                    "sizey": 10,
                    "sizing": "stretch",
                }
            ],
            "layout": {
                "xaxis": {"visible": False},
                "yaxis": {"visible": False},
                "title": {
                    "text": "<b>Frequency of Languages Needed<b>",
                    "x": 0.5,
                    "font": {"color": "#674ea7","family": "Fira Sans, sans-serif","size":20},
                },
                "plot_bgcolor": "#ffffff",  # Set the background color of the plot area
                "paper_bgcolor": "#ffffff",  # Set the background color outside the plot
            },
        }
        cloud_fig = word_cloud_lag

    elif graph_wordcloud == 'cloud2':
        # Generate the word cloud image for location
        lag_counts = Counter()
        for loc in df[mask]['Location']:
            words = loc.split('/')
            unique_words = set(words)  # Keep only unique words in the location string
            lag_counts.update(unique_words)

        # Create a DataFrame from the word counts
        lag_counts_df = pd.DataFrame(lag_counts.items(), columns=['location', 'Frequency'])

        # Generate a word cloud from the filtered locations
        wordcloud = WordCloud(width=1200, height=600, font_step=2, background_color='#e9f3fb').generate_from_frequencies(lag_counts_df.set_index('location')['Frequency'])

        # Save the word cloud image to a BytesIO object
        img_buffer = BytesIO()
        wordcloud.to_image().save(img_buffer, format="PNG")

        # Encode the image data as base64
        img_data = base64.b64encode(img_buffer.getvalue())

        # Create a Plotly figure with an image trace for location
        word_cloud_loc = {
            "data": [
                {
                    "type": "image",
                    "source": f"data:image/png;base64,{img_data.decode()}",
                    "x": 2,
                    "y": 2,
                    "xref": "paper",
                    "yref": "paper",
                    "sizex": 100,
                    "sizey": 10,
                    "sizing": "stretch",
                }
            ],
            "layout": {
                "xaxis": {"visible": False},
                "yaxis": {"visible": False},
                "title": {
                    "text": "<b>Frequency of Locations Needed<b>",
                    "x": 0.5,
                    "font": {"color": "#674ea7","family": "Fira Sans, sans-serif","size":20},
                },
                "plot_bgcolor": "#ffffff",  # Set the background color of the plot area
                "paper_bgcolor": "#ffffff",  # Set the background color outside the plot
            },
        }
        cloud_fig = word_cloud_loc
        
    else :
        # Generate the word cloud image for location
        job_title_counts = Counter()
        for loc in df[mask]['Job_Title']:
            words = loc.split(',')
            unique_words = set([word.strip().lower() for word in words])   # Convert to lowercase and keep only unique words
            job_title_counts.update(unique_words)

        # Create a DataFrame from the word counts
        job_title_counts_df = pd.DataFrame(job_title_counts.items(), columns=['job_title', 'Frequency'])

        # Generate a word cloud from the filtered locations
        wordcloud = WordCloud(width=1200, height=600, font_step=2, background_color='#e9f3fb').generate_from_frequencies(job_title_counts_df.set_index('job_title')['Frequency'])

        # Save the word cloud image to a BytesIO object
        img_buffer = BytesIO()
        wordcloud.to_image().save(img_buffer, format="PNG")

        # Encode the image data as base64
        img_data = base64.b64encode(img_buffer.getvalue())

        # Create a Plotly figure with an image trace for location
        word_cloud_loc = {
            "data": [
                {
                    "type": "image",
                    "source": f"data:image/png;base64,{img_data.decode()}",
                    "x": 2,
                    "y": 2,
                    "xref": "paper",
                    "yref": "paper",
                    "sizex": 100,
                    "sizey": 10,
                    "sizing": "stretch",
                }
            ],
            "layout": {
                "xaxis": {"visible": False},
                "yaxis": {"visible": False},
                "title": {
                    "text": "<b>Frequency of job title Needed<b>",
                    "x": 0.5,
                    "font": {"color": "#674ea7","family": "Fira Sans, sans-serif","size":20},
                },
                "plot_bgcolor": "#ffffff",  # Set the background color of the plot area
                "paper_bgcolor": "#ffffff",  # Set the background color outside the plot
            },
        }
        cloud_fig = word_cloud_loc
    
    # line graph and scatter plot for total number of hiring
    graph_fig = go.Figure()
        
    if selected_graph =='line':
        # Group by MonthYear and calculate the sum of hiring
        grouped_df_line = df[mask].groupby(['MonthYear'])['Hiring'].sum().reset_index()
        
        # Sort the DataFrame by MonthYear
        grouped_df_line = grouped_df_line.sort_values('MonthYear')
        graph_fig  = px.line(
            grouped_df_line, x="MonthYear", y="Hiring",
            color_discrete_sequence=['#ee9a00'],
            markers=False,
            labels={"MonthYear": "Month-Year", "Hiring": "Total Hiring"},
            title="<b>Total Number of Hiring Each Month</b>",
                
        )
        
        graph_fig .update_layout(
            height = 500,
            #font_family = "Arial, sans-serif",
            title_font_family = "Fira Sans, sans-serif",
            title_x = 0.5,
            font_color = "blue",
            title_font = dict(size=20, family="Fira Sans, sans-serif", color='#674ea7'),
            legend_title_font_color = "black",
            margin=dict(l=50, r=50, t=50, b=50),
            plot_bgcolor='#ffffff',# Set the background color of the plot area
            paper_bgcolor='#ffffff',  # Set the background color outside the plot
            xaxis_showgrid=False,  # Remove x-axis gridlines
            yaxis_showgrid=False,  # Remove y-axis gridlinest
            
        )
    
    elif selected_graph == 'scatter':
        # Group by Average Age and calculate the sum of hiring
        grouped_df_scatter = df[mask].groupby(['Age','Gender'])['Hiring'].sum().reset_index()
        
        
        graph_fig  = px.scatter(
            grouped_df_scatter, x="Hiring", y="Age",
            color = "Gender", 
            size='Age',
            #animation_frame="Average_Age",
            #animation_group="Gender",
            log_x=True, size_max=20, range_y=[15,55],
            labels={"Average_Age": "Average Age", "Hiring": "Total Hiring"},
            title="<b>Total Hiring by Average Age<b>"
        )

        graph_fig.update_layout(
            height=500,
            margin=dict(l=50, r=50, t=50, b=50),
            title_font_family = "Fira Sans, sans-serif",
            title_x = 0.5,
            font_color = "blue",
            title_font = dict(size=20, family="Fira Sans, sans-serif", color='#674ea7'),
            legend_title_font_color = "black",
            legend=dict(orientation="h",yanchor="top",xanchor="center",x=0.5,y=-0.2),
            plot_bgcolor='#ffffff',# Set the background color of the plot area
            paper_bgcolor='#ffffff',  # Set the background color outside the plot
            xaxis_showgrid=False,  # Remove x-axis gridlines
            yaxis_showgrid=False,  # Remove y-axis gridlines
        )
        
    # Treemap job title frequency count

    if y_axis_treemap == 'Job':
        # Job title frequency count
        job_title_counts = Counter()
        for title in df[mask]['Job_Title']:
            words = title.split(',')
            unique_words = set([word.strip().lower() for word in words])
            job_title_counts.update(unique_words)

        # Create a DataFrame from the word counts
        job_title_counts_df = pd.DataFrame(job_title_counts.items(), columns=['job_title', 'Frequency'])

        # Get the top N most common job titles
        top_job_titles = job_title_counts.most_common(top_n)

        # Prepare data for Plotly treemap
        labels = [title[0] for title in top_job_titles]
        values = [title[1] for title in top_job_titles]

        # Create a treemap figure using Plotly
        fig_treemap = go.Figure(go.Treemap(
            labels=labels,
            parents=[""] * len(labels),  # All labels are top-level categories
            values=values,
            text=values
        ))
    
        # Set layout for the treemap figure
        fig_treemap.update_layout(
            title="<b>Top {} Job Title Frequencies<b>".format(top_n),
            height=450,
            title_font_family="Fira Sans, sans-serif",
            title_x=0.5,
            font_color="red",
            title_font=dict(size=20, family="Fira Sans, sans-serif", color='#674ea7'),
            margin=dict(t=60, b=30, l=0, r=0),
            plot_bgcolor='#ffffff',  # Set the background color of the plot area
            paper_bgcolor='#ffffff',  # Set the background color outside the plot
        )
    elif y_axis_treemap == 'Company':
        # Job title frequency count
        unique_companies = df['Company_Name'].value_counts()

        # Get the top N most common job titles
        top_companies = unique_companies.head(top_n)
        labels = top_companies.index
        values = top_companies.values

        # Create a treemap figure using Plotly
        fig_treemap = go.Figure(go.Treemap(
            labels=labels,
            parents=[""] * len(top_companies.index),  # All labels are top-level categories
            values=values,
            text=values
        ))
    
        # Set layout for the treemap figure
        fig_treemap.update_layout(
            title="<b>Top {} Hiring Companys<b>".format(top_n),
            height=450,
            title_font_family="Fira Sans, sans-serif",
            title_x=0.5,
            font_color="red",
            title_font=dict(size=20, family="Fira Sans, sans-serif", color='#674ea7'),
            margin=dict(t=60, b=30, l=0, r=0),
            plot_bgcolor='#ffffff',  # Set the background color of the plot area
            paper_bgcolor='#ffffff',  # Set the background color outside the plot
        )
        
    return graph_fig, bar_fig, fig_treemap, cloud_fig


if __name__ == '__main__':
    app.run(jupyter_mode="external",debug=True) # defualt hosting

Dash app running on http://127.0.0.1:8050/


[1;31m---------------------------------------------------------------------------[0m
[1;31mAttributeError[0m                            Traceback (most recent call last)
[1;31mAttributeError[0m: 'float' object has no attribute 'split'



In [9]:
# if __name__ == '__main__':
#     app.run(jupyter_mode="external",debug=True) # defualt hosting
#     #app.run(jupyter_server_url="http://172.23.32.100:8050")
#     #app.run_server(host='172.23.32.100', port='8050',debug=True)