In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

df = pd.read_csv("../Data/Processed/featured_job_listings.csv")

In [None]:
# df.shape
# df.info()
# df.describe(include="all")

In [None]:
# Counts jobs by city
city_counts = df["City"].value_counts().head(10)

plt.figure(figsize=(10, 5))
sns.barplot(
    x = city_counts.values,
    y = city_counts.index
)

plt.title("Top 10 Cities by Job Demand")
plt.xlabel("Number of Job Postings")
plt.ylabel("City")
plt.tight_layout()

plt.savefig(
    "../outputs/figures/job_demand_by_city.png",
    dpi = 300,
    bbox_inches = "tight"
)

In [None]:
# Experience level Distribution

plt.figure(figsize=(6, 8))
sns.countplot(
    data = df,
    x = "ExperienceLevel",
    order = ["Junior", "Mid", "Senior", "Unknown"]
)

plt.title("Job Destribution By ExperienceLevel")
plt.xlabel("Experience Level")
plt.ylabel("Number Of Job Postings")
plt.tight_layout()
plt.savefig(
    "../outputs/figures/experience-level-by-distribution.png",
    dpi = 300,
    bbox_inches = "tight"
)

In [None]:
# Salary Distribution

salary_df = df[df["Salary_Avg"].notna()]

In [None]:
plt.figure(figsize=(8, 5))
sns.histplot(
    salary_df["Salary_Avg"],
    bins = 30,
    kde = True
)

plt.title("Salary Distribution (Annual, Disclosed Salaries Only)")
plt.xlabel("Salary (INR)")
plt.ylabel("Frequency")
plt.tight_layout()

plt.savefig(
    "../outputs/figures/salary_distribution.png",
    dpi = 300,
    bbox_inches = "tight"
)

In [None]:
# Salary vs Experience

salary_exp_df = df[
    (df["Salary_Avg"].notna()) &
    (df["Exp_Avg"].notna())
]

In [None]:
plt.figure(figsize=(8, 5))
sns.scatterplot(
    data = salary_exp_df,
    x = "Exp_Avg",
    y = "Salary_Avg",
    alpha = 0.4
)

plt.title("Salary vs Experience (Disclosed Salary Only)")
plt.xlabel("Average Experience (Years)")
plt.ylabel("Average Salary (INR)")
plt.tight_layout()

plt.savefig(
    "../outputs/figures/salary_vs_experience.png",
    dpi = 300,
    bbox_inches = "tight"
)

In [None]:
plt.figure(figsize=(8, 5))
sns.regplot(
    data=salary_exp_df,
    x="Exp_Avg",
    y="Salary_Avg",
    scatter_kws={"alpha": 0.3},
    line_kws={"color": "red"}
)

plt.title("Salary vs Experience with Trend Line")
plt.xlabel("Average Experience (Years)")
plt.ylabel("Average Salary (INR)")
plt.tight_layout()

plt.savefig(
    "../outputs/figures/salary_vs_experience_trend.png",
    dpi=300,
    bbox_inches="tight"
)

In [None]:
# Skill Demand Analysis

skill_counts = df["PrimarySkill"].value_counts()

plt.figure(figsize=(7, 4))
sns.barplot(
    x = skill_counts.values,
    y = skill_counts.index
)

plt.title("Primary Skill Demand in Job Market")
plt.xlabel("Number of Job Postings")
plt.ylabel("Primary Skill")
plt.tight_layout()

plt.savefig(
    "../outputs/figures/primary_skill_demand.png",
    dpi = 300,
    bbox_inches = "tight"
)

In [None]:
# high value role analysis

df["HighValueRole"].value_counts(normalize=True) * 100

# high value role by city
high_value_city = (
    df[df["HighValueRole"] == 1]["City"]
    .value_counts()
    .head(10)
)

In [None]:
plt.figure(figsize=(8, 4))
sns.barplot(
    x = high_value_city.values,
    y = high_value_city.index
)

plt.title("Top Cities for High_Value Job Roles")
plt.xlabel("Numbers of High-Value Roles")
plt.ylabel("City")
plt.tight_layout()

plt.savefig(
    "../outputs/figures/high_value_roles_by_city.png",
    dpi = 300,
    bbox_inches = "tight"
)

In [None]:
# high value roles by skill
high_value_skill = (
    df[df["HighValueRole"] == 1]["PrimarySkill"]
    .value_counts()
)

plt.figure(figsize=(7, 4))
sns.barplot(
    x = high_value_skill.values,
    y = high_value_skill.index
)

plt.title("Primary Skills in High-Value Roles")
plt.xlabel("Numbers of High-Value Roles")
plt.xlabel("Primary Skill")
plt.tight_layout()

plt.savefig(
    "../outputs/figures/high_value_roles_by_skill.png",
    dpi = 300,
    bbox_inches = "tight"
)