In [8]:
pip install kagglehub


Note: you may need to restart the kernel to use updated packages.


In [9]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("uom190346a/ai-powered-job-market-insights")

print("Path to dataset files:", path)

Path to dataset files: C:\Users\Augus\.cache\kagglehub\datasets\uom190346a\ai-powered-job-market-insights\versions\1


In [10]:
import os
files = os.listdir(path)
print("Files in dataset folder:", files)

Files in dataset folder: ['ai_job_market_insights.csv']


In [11]:
import pandas as pd
csv_file = os.path.join(path, 'ai_job_market_insights.csv')  
job_market_data = pd.read_csv(csv_file)


print(job_market_data.head())

               Job_Title       Industry Company_Size   Location  \
0  Cybersecurity Analyst  Entertainment        Small      Dubai   
1   Marketing Specialist     Technology        Large  Singapore   
2          AI Researcher     Technology        Large  Singapore   
3          Sales Manager         Retail        Small     Berlin   
4  Cybersecurity Analyst  Entertainment        Small      Tokyo   

  AI_Adoption_Level Automation_Risk     Required_Skills     Salary_USD  \
0            Medium            High        UX/UI Design  111392.165243   
1            Medium            High           Marketing   93792.562466   
2            Medium            High        UX/UI Design  107170.263069   
3               Low            High  Project Management   93027.953758   
4               Low             Low          JavaScript   87752.922171   

  Remote_Friendly Job_Growth_Projection  
0             Yes                Growth  
1              No               Decline  
2             Yes         

In [12]:
import pandas as pd
import os
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
import ipywidgets as widgets
from IPython.display import display
import numpy as np

csv_file = os.path.join(path, 'ai_job_market_insights.csv')
df = pd.read_csv(csv_file)

# Encode categorical variables
risk_mapping = {'Low': 0, 'Medium': 1, 'High': 2}
df['Automation_Risk_Encoded'] = df['Automation_Risk'].map(risk_mapping)

# Define salary ranges by increments of $25,000, rounding to the nearest thousand
min_salary = int(df['Salary_USD'].min() // 1000 * 1000)
max_salary = int(df['Salary_USD'].max() // 1000 * 1000 + 25000)
salary_ranges = {f"${i:,}-${i + 24999:,}": (i, i + 24999) for i in range(min_salary, max_salary + 25000, 25000)}
representative_salaries = {k: (v[0] + v[1]) / 2 for k, v in salary_ranges.items()}

features = ['Job_Title', 'Industry', 'Company_Size', 'Location', 'AI_Adoption_Level', 'Salary_USD']
target = 'Automation_Risk_Encoded'

# Encoding job title, industry, company size, and location columns
encoder = LabelEncoder()
encoded_df = df.copy()
for column in ['Job_Title', 'Industry', 'Company_Size', 'Location', 'AI_Adoption_Level']:
    encoded_df[column] = encoder.fit_transform(df[column])

# Train the Random Forest classifier
X = encoded_df[features]
y = encoded_df[target]
rfc = RandomForestClassifier(n_estimators=100, random_state=42)
rfc.fit(X, y)

# Create dropdown widgets for user input
job_titles = df['Job_Title'].unique()
industries = df['Industry'].unique()
company_sizes = df['Company_Size'].unique()
locations = df['Location'].unique()

job_title_dropdown = widgets.Dropdown(options=job_titles, description="Job Title:")
industry_dropdown = widgets.Dropdown(options=industries, description="Industry:")
company_size_dropdown = widgets.Dropdown(options=company_sizes, description="Company Size:")
location_dropdown = widgets.Dropdown(options=locations, description="Location:")
salary_range_dropdown = widgets.Dropdown(options=salary_ranges.keys(), description="Salary Range:")

display(job_title_dropdown, industry_dropdown, company_size_dropdown, location_dropdown, salary_range_dropdown)

# Define prediction function
def predict_automation_risk(job_title, industry, company_size, location, salary_range):
    # Map selected salary range to a representative salary value
    salary = representative_salaries[salary_range]
    
    # Prepare input data by encoding
    input_data = pd.DataFrame([[job_title, industry, company_size, location, 'Medium', salary]], columns=features)
    for column in ['Job_Title', 'Industry', 'Company_Size', 'Location', 'AI_Adoption_Level']:
        input_data[column] = encoder.fit_transform(pd.concat([df[column], input_data[column]], ignore_index=True))[-1]
    
    # Predict using the model
    prediction_encoded = rfc.predict(input_data)[0]
    prediction = {0: 'Low', 1: 'Medium', 2: 'High'}.get(prediction_encoded, "Unknown")
    
    return f"The likelihood of job automation is: {prediction}"

# Button and output area setup
predict_button = widgets.Button(description="Predict Automation Risk")
output = widgets.Output()

# Button click event
def on_button_click(b):
    job_title = job_title_dropdown.value
    industry = industry_dropdown.value
    company_size = company_size_dropdown.value
    location = location_dropdown.value
    salary_range = salary_range_dropdown.value
    
    # Display result in output widget
    with output:
        output.clear_output()  # Clear previous output
        result = predict_automation_risk(job_title, industry, company_size, location, salary_range)
        print(result)  # Display prediction result

predict_button.on_click(on_button_click)

# Display button and output widget
display(predict_button, output)


Dropdown(description='Job Title:', options=('Cybersecurity Analyst', 'Marketing Specialist', 'AI Researcher', …

Dropdown(description='Industry:', options=('Entertainment', 'Technology', 'Retail', 'Education', 'Finance', 'T…

Dropdown(description='Company Size:', options=('Small', 'Large', 'Medium'), value='Small')

Dropdown(description='Location:', options=('Dubai', 'Singapore', 'Berlin', 'Tokyo', 'San Francisco', 'London',…

Dropdown(description='Salary Range:', options=('$31,000-$55,999', '$56,000-$80,999', '$81,000-$105,999', '$106…

Button(description='Predict Automation Risk', style=ButtonStyle())

Output()