# Import Required Libraries
Import pandas for data manipulation and matplotlib for visualization.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Load the Dataset
Read the processed jobs.csv file into a pandas DataFrame.

In [None]:
data = pd.read_csv("../data/processed/jobs.csv")

# Identify Top Hiring Companies
Group the data by company name and count the number of job postings for each. Sort and select the top N companies.

In [None]:
top_n = 10  # You can adjust this value as needed
company_counts = data['company_name'].value_counts().head(top_n)
top_companies = company_counts.index.tolist()

# Calculate Average Salary Range per Company
For each top company, calculate the average of the salary range columns (e.g., average of min and max salary if available).

In [None]:
# Assuming the dataset has 'salary_min' and 'salary_max' columns
salary_stats = (
    data[data['company_name'].isin(top_companies)]
    .groupby('company_name')[['salary_min', 'salary_max']]
    .mean()
    .reset_index()
)
salary_stats['average_salary'] = salary_stats[['salary_min', 'salary_max']].mean(axis=1)

# Analyze Job Locations
Extract and count the most common job locations for the top hiring companies.

In [None]:
location_counts = (
    data[data['company_name'].isin(top_companies)]
    .groupby('company_name')['location']
    .value_counts()
    .groupby(level=0)
    .head(1)
    .reset_index(level=0)
)
location_counts = location_counts.rename(columns={'location': 'top_location', 'count': 'job_count'})

# Visualize Top Hiring Companies
Create a bar chart showing the number of job postings for the top hiring companies.

In [None]:
plt.figure(figsize=(10, 6))
company_counts.plot(kind='bar', color='teal')
plt.title('Top Hiring Companies by Number of Job Postings')
plt.xlabel('Company Name')
plt.ylabel('Number of Job Postings')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

# Visualize Average Salary Range by Company
Create a bar chart of the average salary range for each of the top hiring companies.

In [None]:
plt.figure(figsize=(10, 6))
plt.bar(salary_stats['company_name'], salary_stats['average_salary'], color='orange')
plt.title('Average Salary Range by Top Hiring Companies')
plt.xlabel('Company Name')
plt.ylabel('Average Salary')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

# Visualize Job Locations Distribution
Create a bar chart or pie chart showing the distribution of job locations among the top hiring companies.

In [None]:
# Count the top locations for each company
top_locations = (
    data[data['company_name'].isin(top_companies)]
    .groupby('location')
    .size()
    .sort_values(ascending=False)
    .head(10)
)

plt.figure(figsize=(10, 6))
top_locations.plot(kind='bar', color='purple')
plt.title('Top Job Locations Among Top Hiring Companies')
plt.xlabel('Location')
plt.ylabel('Number of Job Postings')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()