# Job Market Insights EDA
Exploratory Data Analysis of Job Postings

In [None]:
import pandas as pd
import sqlite3
import matplotlib.pyplot as plt
import seaborn as sns

# Configuration
DB_FILE = r"C:\Users\psaluja1\.gemini\antigravity\scratch\job_market_analytics\jobs.db"

# Connect to DB
conn = sqlite3.connect(DB_FILE)

# Load Data
df = pd.read_sql("SELECT * FROM jobs_clean", conn)
skills_df = pd.read_sql("""
    SELECT s.skill_name, js.job_id 
    FROM skills_lookup s 
    JOIN job_skills js ON s.skill_id = js.skill_id
""", conn)

print(f"Loaded {len(df)} jobs and {len(skills_df)} skill records.")

## Salary Distribution

In [None]:
plt.figure(figsize=(10, 6))
sns.histplot(df['avg_salary'].dropna(), kde=True)
plt.title('Salary Distribution')
plt.xlabel('Average Salary')
plt.show()

## Top 20 Skills

In [None]:
top_skills = skills_df['skill_name'].value_counts().head(20)
plt.figure(figsize=(12, 8))
sns.barplot(x=top_skills.values, y=top_skills.index)
plt.title('Top 20 Most Demanded Skills')
plt.xlabel('Count')
plt.show()

## Job Titles Frequency

In [None]:
top_titles = df['job_title_clean'].value_counts().head(10)
plt.figure(figsize=(10, 6))
sns.barplot(x=top_titles.values, y=top_titles.index)
plt.title('Top 10 Job Titles')
plt.show()