<a href="https://colab.research.google.com/github/BalasuriyaK/AI-Jobs-Datasets/blob/main/Ai_Jobs_Dataset_Ensure_Business_and_Data_Understanding.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

import csv

def readcsvtodict(filepath):
    data = []
    try:
        with open(filepath, 'r') as f:
            reader = csv.reader(f)
            headers = next(reader)
            for row in reader:
                employee_data = {}
                for i, header in enumerate(headers):
                    employee_data[header] = row[i]
                data.append(employee_data)
    except FileNotFoundError:
        print(f"Error: The file '{filepath}' was not found.")
        return None
    except ValueError as e:
        print(f"ValueError in row: {e}")

    return data

# --- STEP 2: LOAD THE DATA ---
# We will use 'ai_jobs' as the variable name for consistency.
ai_jobs = readcsvtodict(filepath='/content/ai_jobdata.pycsv.csv')

# --- STEP 3: PERFORM ALL ANALYSIS ---
# We'll put all the analysis code here.
if not ai_jobs:
    print("Data loading failed. Please check your file path.")
else:
    # 3a. Calculate the average salary
    total_salary = 0
    valid_salaries = 0
    for job in ai_jobs:
        try:
            total_salary += float(job.get('salary_usd', 0))
            valid_salaries += 1
        except (ValueError, KeyError):
            continue

    if valid_salaries > 0:
        average_salary = total_salary / valid_salaries
        print(f"The average salary (USD) is: ${average_salary:.2f}\n")
    else:
        print("No valid salary data found to calculate the average.\n")


    # 3b. Find the job with the highest salary
    highest_salary = 0
    highest_paid_job = None
    for job in ai_jobs:
        try:
            current_salary = float(job.get('salary_usd', 0))
            if current_salary > highest_salary:
                highest_salary = current_salary
                highest_paid_job = job
        except (ValueError, KeyError):
            continue

    if highest_paid_job:
        print("The job with the highest salary is:")
        print(f"Job Title: {highest_paid_job.get('job_title', 'N/A')}")
        print(f"Salary (USD): ${highest_paid_job.get('salary_usd', 'N/A')}")
        print(f"Location: {highest_paid_job.get('location', 'N/A')}")
    else:
        print("Could not find the highest paid job. The dataset might be empty or have no valid salary data.")

    # 3c. Count jobs per location
    location_counts = {}
    for job in ai_jobs:
        location = job.get('location', 'Unknown')
        location_counts[location] = location_counts.get(location, 0) + 1

    print("\nJob counts by location:")
    for location, count in sorted(location_counts.items(), key=lambda item: item[1], reverse=True):
        print(f"- {location}: {count}")

The average salary (USD) is: $115348.97

The job with the highest salary is:
Job Title: Machine Learning Researcher
Salary (USD): $399095
Location: N/A

Job counts by location:
- Unknown: 15000
