In [None]:
import os
import kagglehub
import plotly.express as px
import polars as pl
from pathlib import Path

pl.Config.set_tbl_cols(20)
csv_path = Path(
    kagglehub.dataset_download(
        "bismasajjad/global-ai-job-market-and-salary-trends-2025"
    )
).joinpath("ai_job_dataset.csv")

[Dataset in Kaggle](https://www.kaggle.com/datasets/bismasajjad/global-ai-job-market-and-salary-trends-2025/data)

In [None]:
# @title Preprocessing

df = pl.read_csv(csv_path)
df = df.with_columns([
    pl.col("posting_date").cast(pl.Date),
    pl.col("application_deadline").cast(pl.Date),
    pl.col("required_skills").str.split(",").list.eval(pl.element().str.strip_chars()),
    pl.col("remote_ratio")
    .replace_strict({0: "No remote", 50: "Hybrid", 100: "Fully remote"})
    .cast(pl.Enum(["No remote", "Hybrid", "Fully remote"])),
]).sort("salary_usd", descending=True)


### EDA

In [3]:
df.describe()

statistic,job_id,job_title,salary_usd,salary_currency,experience_level,employment_type,company_location,company_size,employee_residence,remote_ratio,required_skills,education_required,years_experience,industry,posting_date,application_deadline,job_description_length,benefits_score,company_name
str,str,str,f64,str,str,str,str,str,str,str,f64,str,f64,str,str,str,f64,f64,str
"""count""","""15000""","""15000""",15000.0,"""15000""","""15000""","""15000""","""15000""","""15000""","""15000""","""15000""",15000.0,"""15000""",15000.0,"""15000""","""15000""","""15000""",15000.0,15000.0,"""15000"""
"""null_count""","""0""","""0""",0.0,"""0""","""0""","""0""","""0""","""0""","""0""","""0""",0.0,"""0""",0.0,"""0""","""0""","""0""",0.0,0.0,"""0"""
"""mean""",,,115348.965133,,,,,,,,,,6.2532,,"""2024-08-29 08:48:51.840000""","""2024-10-11 21:55:23.519000""",1503.314733,7.504273,
"""std""",,,60260.940438,,,,,,,,,,5.545768,,,,576.127083,1.45087,
"""min""","""AI00001""","""AI Architect""",32519.0,"""EUR""","""EN""","""CT""","""Australia""","""L""","""Australia""",,,"""Associate""",0.0,"""Automotive""","""2024-01-01""","""2024-01-16""",500.0,5.0,"""AI Innovations"""
"""25%""",,,70180.0,,,,,,,,,,2.0,,"""2024-04-29""","""2024-06-13""",1004.0,6.2,
"""50%""",,,99724.0,,,,,,,,,,5.0,,"""2024-08-28""","""2024-10-12""",1512.0,7.5,
"""75%""",,,146407.0,,,,,,,,,,10.0,,"""2024-12-29""","""2025-02-10""",2000.0,8.8,
"""max""","""AI15000""","""Robotics Engineer""",399095.0,"""USD""","""SE""","""PT""","""United States""","""S""","""United States""",,,"""PhD""",19.0,"""Transportation""","""2025-04-30""","""2025-07-11""",2499.0,10.0,"""TechCorp Inc"""


In [4]:
df.glimpse()

Rows: 15000
Columns: 19
$ job_id                       <str> 'AI09892', 'AI02310', 'AI12500', 'AI12804', 'AI00318', 'AI05484', 'AI10469', 'AI09638', 'AI08378', 'AI14432'
$ job_title                    <str> 'Machine Learning Researcher', 'AI Architect', 'AI Research Scientist', 'AI Specialist', 'Data Scientist', 'AI Architect', 'AI Product Manager', 'AI Software Engineer', 'AI Research Scientist', 'AI Research Scientist'
$ salary_usd                   <i64> 399095, 398084, 394917, 390292, 388754, 383142, 381575, 379418, 372206, 371087
$ salary_currency              <str> 'USD', 'USD', 'USD', 'USD', 'USD', 'USD', 'USD', 'USD', 'USD', 'USD'
$ experience_level             <str> 'EX', 'EX', 'EX', 'EX', 'EX', 'EX', 'EX', 'EX', 'EX', 'EX'
$ employment_type              <str> 'PT', 'CT', 'CT', 'FT', 'CT', 'PT', 'PT', 'FL', 'FT', 'CT'
$ company_location             <str> 'Switzerland', 'Switzerland', 'Switzerland', 'Switzerland', 'Switzerland', 'Switzerland', 'Switzerland', 'Denmark', 'Switzer

In [5]:
print("Count of the unique values:")
df.select(pl.all().n_unique())

Count of the unique values:


job_id,job_title,salary_usd,salary_currency,experience_level,employment_type,company_location,company_size,employee_residence,remote_ratio,required_skills,education_required,years_experience,industry,posting_date,application_deadline,job_description_length,benefits_score,company_name
u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32
15000,20,14315,3,4,4,20,3,20,3,13663,4,20,15,486,543,2000,51,16


> ### 1)  کشورهایی با بالاترین میانگین حقوق در مشاغل حوزهٔ هوش مصنوعی کدام‌اند؟


In [None]:
salary_by_country = (
    df.group_by(["company_location"])
    .agg([
        pl.col("salary_usd"),
        pl.col("salary_usd").mean().cast(int).alias("mean_salary_usd"),
    ])
    .sort("mean_salary_usd", descending=True)
)

boxplot_salary_by_country = px.box(
    salary_by_country.explode("salary_usd"),
    x="company_location",
    y="salary_usd",
    color="company_location",
    title="Box Plot of Salary by Country",
)
boxplot_salary_by_country.show()

barplot_mean_salary_by_country = px.bar(
    salary_by_country,
    x="company_location",
    y="mean_salary_usd",
    color="mean_salary_usd",
    text_auto=True,
    title="Mean Salary by Country",
)
barplot_mean_salary_by_country.show()


> ### 2)  رایج‌ترین مهارت‌ها یا شرایط موردنیاز در آگهی‌های شغلی مربوط به هوش مصنوعی چیست؟

In [None]:
required_skills = (
    df.explode("required_skills")["required_skills"]
    .value_counts(sort=True)
    .with_columns(
        (pl.col("count") * 100 / df.height).round(1).alias("required_skills_percentage")
    )
)

barplot_required_skills_percentage = px.bar(
    required_skills,
    x="required_skills",
    y="required_skills_percentage",
    text_auto=True,
    color="required_skills_percentage",
    title="Required Skills",
)
barplot_required_skills_percentage.show()

> ### 3)  چه رابطه‌ای میان سطح تجربه و متوسط حقوق وجود دارد؟

In [None]:
experience_order = ["EN", "MI", "SE", "EX"]

salary_by_experience = (
    df.group_by("experience_level")
    .agg([pl.col("salary_usd")])
    .with_columns(pl.col("experience_level").cast(pl.Enum(categories=experience_order)))
    .sort("experience_level")
)

violin_plot_salary_by_experience = px.violin(
    salary_by_experience.explode("salary_usd"),
    x="experience_level",
    y="salary_usd",
    title="Violin Plot of Salary Distribution by Experience Level",
    color="experience_level",
    box=True,
)

violin_plot_salary_by_experience.show()

> ### 4)  چه تفاوتی میان نسبت مشاغل دورکار در انواع مختلف استخدام (پاره‌وقت، تمام‌وقت، قراردادی، فریلنسری) دیده می‌شود؟

In [None]:
remote_ratio_by_type = df.group_by("employment_type").agg(pl.col("remote_ratio"))
type_by_remote_ratio = df.group_by("remote_ratio").agg(pl.col("employment_type"))

px.histogram(
    type_by_remote_ratio.explode("employment_type"),
    x="remote_ratio",
    color="employment_type",
    barnorm="percent",
    title="Employment Type Composition by Work Style",
    labels={"remote_ratio": "Work Style", "employment_type": "Employment Type"},
).show()


px.histogram(
    remote_ratio_by_type.explode("remote_ratio"),
    x="employment_type",
    color="remote_ratio",
    barnorm="percent",
    title="Work Style Composition by Employment Type",
    labels={"employment_type": "Employment Type", "remote_ratio": "Work Style"},
).show()

> ### 5)  آیا بین تعداد سال‌های تجربه و حقوق دلاری رابطهٔ معناداری وجود دارد؟ نموداری که به نظرتان بهتر این موضوع را تحلیل می‌کند، رسم نمایید.

In [None]:
salary_by_years_experience = (
    df.group_by("years_experience").agg(pl.col("salary_usd")).sort("years_experience")
)

boxplot_salary_by_years_experience = px.box(
    salary_by_years_experience.explode("salary_usd"),
    x="years_experience",
    y="salary_usd",
    title="Violin Plot of Salary Distribution by Experience Level",
    color="years_experience",
)

boxplot_salary_by_years_experience.show()

> ### 6)  توزیع حقوق بر اساس سطح تحصیلات چگونه است؟ نموداری که به نظرتان بهتر این موضوع را تحلیل می‌کند، رسم نمایید.

In [None]:
education_order = ["Associate", "Bachelor", "Master", "PhD"]

salary_by_education = (
    df.group_by("education_required")
    .agg([pl.col("salary_usd")])
    .with_columns(
        pl.col("education_required").cast(pl.Enum(categories=education_order))
    )
    .sort("education_required")
)
salary_by_education

boxplot_salary_by_education = px.box(
    salary_by_education.explode("salary_usd"),
    x="education_required",
    y="salary_usd",
    title="Violin Plot of Salary Distribution by Experience Level",
    color="education_required",
)
boxplot_salary_by_education.show()

> ### 7)  میانگین حقوق بر اساس کشور محل شرکت چگونه تفاوت می‌کند؟ نموداری که به نظرتان بهتر این موضوع را تحلیل می‌کند، رسم نمایید.

In [None]:
mean_salary_by_country = (
    df.group_by("company_location")
    .agg(pl.col("salary_usd").mean().cast(int).alias("mean_salary_usd"))
    .sort("mean_salary_usd", descending=True)
)


choropleth_map_mean_salary_by_country = px.choropleth(
    mean_salary_by_country,
    locations="company_location",
    locationmode="country names",
    color="mean_salary_usd",
    hover_name="company_location",
    color_continuous_scale=px.colors.sequential.Emrld,
    title="Choropleth Map of Mean Salary by Country (USD)",
)

choropleth_map_mean_salary_by_country.show()

> ### 8)  آیا طول توضیحات شغلی در آگهی‌ها طی زمان تغییر کرده است؟

In [None]:
desc_length_by_date = (
    df.group_by("posting_date")
    .agg(
        pl.col("job_description_length"),
        pl.col("job_description_length")
        .mean()
        .cast(int)
        .alias("mean_job_description_length"),
    )
    .sort("posting_date")
)


boxplot_desc_length_by_date = px.box(
    desc_length_by_date.explode("job_description_length"),
    x="posting_date",
    y="job_description_length",
    title="Job Description Length by Date",
)
boxplot_desc_length_by_date.show()

lineplot_desc_length_by_date = px.line(
    desc_length_by_date,
    x="posting_date",
    y="mean_job_description_length",
)
lineplot_desc_length_by_date.show()

> ### 9)  توزیع شغل‌ها در صنایع مختلف به چه صورت است؟ به صورت نمودار دایره‌ای نیز تحلیل گردد.

In [None]:
jobs_by_industry = df.group_by("industry").len().sort("len", descending=True)
pie_chart_jobs_by_industry = px.pie(
    jobs_by_industry, names="industry", values="len", title="Jobs by Industry"
)
pie_chart_jobs_by_industry.show()

> ### 10)  برای افراد با تجربه‌های شغلی مختلف، چگونه تغییر حقوق میان سطوح مشاهده می‌شود؟

In [None]:
experience_order = ["EN", "MI", "SE", "EX"]

salary_by_experience = (
    df.group_by("experience_level")
    .agg([pl.col("salary_usd")])
    .with_columns(pl.col("experience_level").cast(pl.Enum(categories=experience_order)))
    .sort("experience_level")
)

boxplot_salary_by_experience = px.box(
    salary_by_experience.explode("salary_usd"),
    x="experience_level",
    y="salary_usd",
    title="Box Plot of Salary Distribution by Experience Level",
    color="experience_level",
    # box=True,
)

boxplot_salary_by_experience.show()

> ### 11)  ده مهارت یا الزام شغلی رایج در بازار کار هوش مصنوعی کدام‌اند و در چه درصدی از آگهی‌ها تکرار شده‌اند؟

In [None]:
top_ten_required_skills = (
    df.explode("required_skills")["required_skills"]
    .value_counts(sort=True)
    .with_columns(
        (pl.col("count") * 100 / df.height).round(2).alias("required_skills_percentage")
    )[:10]
)

barplot_required_skills_percentage = px.bar(
    top_ten_required_skills,
    x="required_skills",
    y="required_skills_percentage",
    text_auto=True,
    color="required_skills_percentage",
    title="Top 10 Required Skills",
)
barplot_required_skills_percentage.show()

> ### 12)  مجموع کل حقوق پرداخت‌شده به ازای هر سطح تجربه چقدر است و این مقدار چگونه بین سطوح توزیع شده؟ نموداری که به نظرتان بهتر این موضوع را تحلیل می‌کند، رسم نمایید.

In [None]:
total_salary_by_experience = (
    df.group_by("experience_level")
    .agg([pl.col("salary_usd").sum().alias("total_salary_usd")])
    .sort("total_salary_usd")
)

px.bar(
    total_salary_by_experience,
    x="experience_level",
    y="total_salary_usd",
    text_auto=True,
    color="experience_level",
    title="Total Salary by Experience Level",
).show()

> ### 13)  توزیع حقوق بر اساس اندازهٔ شرکت (کوچک، متوسط، بزرگ) چه الگوهایی دارد؟

In [None]:
company_size_order = ["S", "M", "L"]

salary_by_company_size = (
    df.group_by("company_size")
    .agg([pl.col("salary_usd")])
    .with_columns(pl.col("company_size").cast(pl.Enum(categories=company_size_order)))
    .sort("company_size")
)

violin_plot_salary_by_company_size = px.violin(
    salary_by_company_size.explode("salary_usd"),
    x="company_size",
    y="salary_usd",
    title="Violin Plot of Salary Distribution by Company Size",
    color="company_size",
    box=True,
)
violin_plot_salary_by_company_size.show()