In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.
import kagglehub
bismasajjad_global_ai_job_market_and_salary_trends_2025_path = kagglehub.dataset_download('bismasajjad/global-ai-job-market-and-salary-trends-2025')

print('Data source import complete.')


# Insights from the Global AI Job Market Dataset


In [None]:
import pandas as pd
df = pd.read_csv('/kaggle/input/global-ai-job-market-and-salary-trends-2025/ai_job_dataset1.csv')

### ⦁ Experience level categorization:
EN (Entry), MI (Mid), SE (Senior), EX (Executive)
### ⦁ employment_type:
FT (Full-time), PT (Part-time), CT (Contract), FL (Freelance)
### ⦁ company_size:
S (Small <50), M (Medium 50-250), L (Large >250)
### ⦁ remote_ratio:
0 (No remote), 50 (Hybrid), 100 (Fully remote)

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
df.info()

In [None]:
df['posting_date'] = pd.to_datetime(df['posting_date'])
df['application_deadline'] = pd.to_datetime(df['application_deadline'])

In [None]:
df.shape
#15000*20

In [None]:
df.isna().sum().sum()
#no null or missing values

In [None]:
df.duplicated().sum()
#no duplicates

In [None]:
df.describe()

In [None]:
df.groupby('job_title')['salary_usd'].mean()

## 1.Salary Insights
- **Average: ~$122K/year**
- Highest salaries:
  
  - Data Engineer: ~$127.5K
    
  - AI Product Manager: ~$126K

  - Deep Learning Engineer: ~$124.4K  
→ Technical engineering roles are the most rewarding.

### There is a right skewness in salary_usd
### Most roles target mid-level professionals.

In [None]:
import matplotlib.pyplot as plt

df['salary_usd'].hist(bins=50)
plt.title('Salary USD Distribution')
plt.xlabel('Salary USD')
plt.ylabel('Frequency')
plt.show()

In [None]:
df.describe(include='O')
#ML Engineers are in high demand but not dominating the market. 824/15000*100 = 5.49 %
#

In [None]:
df['job_title'].value_counts()

## 2. Top AI Job Titles
- **Machine Learning Engineer (824 jobs)**
- **Deep Learning Engineer (786 jobs)**
- **Computer Vision Engineer (780 jobs)**  
→ Strong demand for core machine learning and deep learning roles.

In [None]:
top_jobs = df['job_title'].value_counts().head(10)
plt.figure(figsize=(10, 6))
plt.barh(top_jobs.index[::-1], top_jobs.values[::-1])
plt.xlabel("Number of Jobs")
plt.ylabel("Job Title")
plt.title("Top 10 AI Job Titles by Demand")
plt.tight_layout()
plt.show()

In [None]:
df['experience_level'].value_counts()
# EN (Entry), MI (Mid), SE (Senior), EX (Executive)

## 3. Experience Level Distribution
- **Experienced (EX): 3,843**
- **Mid-level (MI): 3,764**
- **Senior (SE): 3,741**
- **Entry-level (EN): 3,652**  
→ Market favors experienced professionals.

In [None]:
avg_salary_exp = df.groupby('experience_level')['salary_usd'].mean().sort_values()
plt.figure(figsize=(8, 5))
plt.bar(avg_salary_exp.index, avg_salary_exp.values)
plt.ylabel("Average Salary (USD)")
plt.title("Average Salary by Experience Level")
plt.tight_layout()
plt.show()

In [None]:
df['employment_type'].value_counts()

## 4. Employment Type
- **Contract: 3,841**
- **Part-time: 3,757**
- **Freelance: 3,705**
- **Full-time: 3,697**  
→ Contract and part-time opportunities are highly available.

In [None]:
print(df['company_location'].value_counts())

In [None]:
print(df['employee_residence'].value_counts().head(10))

## 5. Company Location & Residence
- **Top company locations:**
  
  Switzerland, Canada, UK, USA, Singapore, China  
- **Top employee residences:**
  
  Switzerland, China, Ireland, Canada, France, India  
→ High concentration of AI jobs in Europe, North America, and parts of Asia.

In [None]:
df['remote_ratio'].value_counts()

## 6. Remote Work
- **100% remote: 5,034**
- **50% remote: 4,991**
- **On-site: 4,975**  
→ Balanced distribution with a slight preference for remote work.

In [None]:
df['education_required'].value_counts()

## 7. Education Requirements
- **Bachelor's: 3,863**
- **Master's: 3,688**
- **Associate: 3,688**
- **PhD: 3,761**  
→ Bachelor's is dominant, but Master's and PhD remain valuable.

In [None]:
df.groupby('education_required')['salary_usd'].mean()

## 8. Salary vs Education
- Master's: ~$123K
   
- PhD: ~$119.7K

  
→ Master's degree tends to lead to higher pay compared to PhD.

In [None]:
df.groupby('company_size')['salary_usd'].mean()

## 9. Salary vs Company Size
- Large: ~$138K
  
- Medium: ~$118K

- Small: ~$108K  
→ Larger companies generally offer higher salaries.

In [None]:
avg_salary_size = df.groupby('company_size')['salary_usd'].mean().sort_values()
plt.figure(figsize=(6, 4))
plt.bar(avg_salary_size.index, avg_salary_size.values, color=['gray', 'turquoise', 'purple'])
plt.ylabel("Average Salary (USD)")
plt.title("Average Salary by Company Size")
plt.tight_layout()
plt.show()


In [None]:
df.groupby('company_name')['salary_usd'].mean()

In [None]:
df.groupby('company_name').agg({
    'salary_usd': 'mean',
    'benefits_score': 'mean',
    'remote_ratio': 'mean',
    'company_size': 'first'
}).sort_values(by='salary_usd', ascending=False)

In [None]:
df.groupby('education_required')['salary_usd'].mean()


In [None]:
df['required_skills'].mode()[0]

In [None]:
pivot_table = pd.pivot_table(df,
    values='salary_usd',
    index='industry',
    columns='job_title',
    aggfunc='count')
pivot_table