In [28]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [29]:
# Load dataset function
def load_data(file_path):
    #Loads the job market dataset CSV file into a Pandas DataFrame.  Args:file_path (str): Path to the dataset CSV file. Returns: pd.DataFrame: Loaded dataset."
    df = pd.read_csv('Jupyter projects/700 JOBs Data of AI  and Data Fields/jobs_dataset.csv')
    # Convert salary to numeric if available, handling missing or malformed entries
    if 'Salary' in df.columns:
        df['Salary'] = pd.to_numeric(df['Salary'], errors='coerce')
    return df


In [30]:
# Basic data overview and summary stats
def summarize_data(df):
    # Prints basic summaries of the dataset useful for initial exploration.Args: df (pd.DataFrame): The job market DataFrame.
    print("Dataset Summary:")
    print(df.info())
    print("\nCompany Rating Stats:")
    print(df['Company Rating'].describe())
    print("\nJob Type Counts:")
    print(df['Job Type'].value_counts())
    print("\nSample Job Titles:")
    print(df['Position Name'].value_counts().head(10))

In [24]:
# Visualize company ratings distribution
def plot_company_rating_distribution(df):
   # Plots histogram of Company Ratings. Args: df (pd.DataFrame): The job market DataFrame with 'Company Rating'. Output: Saves 'company_rating_distribution.png'
    plt.figure(figsize=(8,6))
    sns.histplot(df['Company Rating'].dropna(), bins=20, kde=True)
    plt.title('Distribution of Company Ratings')
    plt.xlabel('Rating')
    plt.ylabel('Count')
    plt.savefig('company_rating_distribution.png')
    plt.close()


In [25]:
# Visualize job type frequency
def plot_job_type_counts(df):
    plt.figure(figsize=(8,6))
    sns.countplot(y='Job Type', data=df, order=df['Job Type'].value_counts().index)
    plt.title('Job Type Counts')
    plt.xlabel('Number of Jobs')
    plt.ylabel('Job Type')
    plt.savefig('job_type_counts.png')
    plt.close()

In [26]:
# Visualize salary distribution (if available)
def plot_salary_distribution(df):
    plt.figure(figsize=(8,6))
    sns.boxplot(x='Salary', data=df)
    plt.title('Salary Distribution')
    plt.xlabel('Salary')
    plt.savefig('salary_distribution.png')
    plt.close()

In [27]:
# Location heatmap of job positions (top locations only)
def plot_top_locations(df, top_n=10):
    top_locations = df['Location'].value_counts().head(top_n)
    plt.figure(figsize=(10,6))
    sns.barplot(y=top_locations.index, x=top_locations.values, orient='h')
    plt.title(f'Top {top_n} Job Locations')
    plt.xlabel('Number of Job Posts')
    plt.ylabel('Location')
    plt.savefig('top_locations.png')
    plt.close()


In [13]:
# Main analysis function
def main():
    # Update the path to your dataset CSV file here
    data_path = 'Jupyter projects/700 JOBs Data of AI  and Data Fields/jobs_dataset.csv'
    
    df = load_data(data_path)
    summarize_data(df)
    
    # Generate required charts
    if 'Company Rating' in df.columns:
        plot_company_rating_distribution(df)
    if 'Job Type' in df.columns:
        plot_job_type_counts(df)
    if 'Salary' in df.columns:
        plot_salary_distribution(df)
    if 'Location' in df.columns:
        plot_top_locations(df)

if __name__ == '__main__':
    main()

NameError: name 'pd' is not defined