# Global AI Job Market & Salary Trends 2025

The dataset was obtained from the Kaggle website.

## Import required libraries and data

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import gdown

In [None]:
# Get the dataset 
# https://drive.google.com/file/d/1dsikUM2umj1sD19A_RXB-v9a2vPziXyb/view?usp=sharing
file_id = "1dsikUM2umj1sD19A_RXB-v9a2vPziXyb"
url = f"https://drive.google.com/uc?id={file_id}"
output = "job_market.csv"

gdown.download(url, output, quiet=False)

# Load the dataset
df = pd.read_csv(output)
df.head()

NameError: name 'gdown' is not defined

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15000 entries, 0 to 14999
Data columns (total 19 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   job_id                  15000 non-null  object 
 1   job_title               15000 non-null  object 
 2   salary_usd              15000 non-null  int64  
 3   salary_currency         15000 non-null  object 
 4   experience_level        15000 non-null  object 
 5   employment_type         15000 non-null  object 
 6   company_location        15000 non-null  object 
 7   company_size            15000 non-null  object 
 8   employee_residence      15000 non-null  object 
 9   remote_ratio            15000 non-null  int64  
 10  required_skills         15000 non-null  object 
 11  education_required      15000 non-null  object 
 12  years_experience        15000 non-null  int64  
 13  industry                15000 non-null  object 
 14  posting_date            15000 non-null

## Data Cleaning

- Check for missing values
- Convert columns to appropriate types
- Add colunms with shortcuts decode


| Column name | Initial type | Converted type |
|-------------|--------------|----------------|
| posting_date | string | datetime |
| application_deadline | string | datetime |
| required_skills | string | array |


In [None]:
def parce_array (df_string):
    try: 
        return eval(df_string)
    except:
        return []

def parce_dates (df_string):
    try:
        return pd.to_datetime(df_string, format='%Y-%m-%d', errors='coerce')
    except Exception:
        return pd.Series([pd.NaT] * len(df_string))

def decode_column(df, df_column):
    mapping = {
        # employment_type
        'FT': 'Full-time',
        'PT': 'Part-time',
        'CT': 'Contract',
        'FL': 'Freelance',
        'IN': 'Internship',
        'TP': 'Temporary',

        # experience_level
        'EN': 'Entry',
        'MI': 'Mid',
        'SE': 'Senior',
        'EX': 'Executive'
    }
    new_column = f"{df_column}_full"
    df[new_column] = df[df_column].map(mapping).fillna('Other')
    return df

def clean_dataframe(df):
    df = df.copy()
    
    df['required_skills'] = df['required_skills'].apply(parce_array)
    df['posting_date'] = df['posting_date'].apply(parce_dates)
    df['application_deadline'] = df['application_deadline'].apply(parce_dates)

    df = decode_column(df, 'employment_type')
    df = decode_column(df, 'experience_level')
    
    return df

df = clean_dataframe(df)
df.info()

## Export to Google Sheets
Export analysis results to Google Sheets using gspread.

In [None]:
import gspread
from gspread_dataframe import set_with_dataframe
from google.colab import auth
auth.authenticate_user()

from google.auth import default
creds, _ = default()
gc = gspread.authorize(creds)

spreadsheet = gc.create('JM_Analysis')
worksheet = spreadsheet.get_worksheet(0)

set_with_dataframe(worksheet, df)

spreadsheet.url