### Import libraries

In [1]:
import requests
import json
import csv
import pandas as pd

### Read the JSON file from online source and load the data

In [None]:
# Specify the URL where the JSON data is located
url = 'https://simplonline-v3-prod.s3.eu-west-3.amazonaws.com/media/file/json/employes-data-6710b180e7d62991266985.json'

# Make a request to get the data from the URL
response = requests.get(url)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Parse the JSON data from the response
    raw_data = response.json()

    # Print the 'data' to verify it contains the JSON as a Python dictionary
    print(raw_data)

### Load the data into a variable

In [None]:
# Create a copy of the raw data
all_company_data = raw_data

# Print data to verify it loads correectly
all_company_data

### Calculate Global company statistics

In [None]:
def getGlobalCompanyStatistics(d: dict) -> dict:
    """     
    Calculates the global minimum, average, and maximum salaries of all companies in a given dictionary.

    [Args]
    d (dict): A dictionary with the company names are the keys(strings) and the values are lists of dictionaries.
              Each dictionary contains the employee details including their name, job title, hourly rates, weekly hours worked
              and their contract hours.

    [Return]
    dict: A dictionary containing the global salary statistics, with keys:
          - 'average_salary': The average of the employee salaries across all companies.
          - 'min_salary': The lowest of the employee salaries across all companies.
          - 'min_salary': The lowest of the employee salaries across all companies.


    """
    
    # list to store the number of employees per company
    number_of_global_employees = []

    # list to store the salaries across all companies
    global_salaries = []

    # list to store salaries as float 
    global_salaries_float = []

    # Dictionary to store final global statistics
    global_employees_statistics = {}

    for company, company_employees in d.items():
        # Append the number of employees per company to the number_of_global_employees list
        number_of_global_employees.append(float(len(company_employees)))

        for personnel in company_employees:

            # Get employee details
            weekly_hours_worked = personnel.get("weekly_hours_worked", 0)
            contract_hours = personnel.get("contract_hours", 0)
            hourly_rate = personnel.get("hourly_rate", 0)
                
            # Check if the employee did not work overtime
            if weekly_hours_worked <= contract_hours:
                
                # Calculate employee monthly salary
                global_salary = ((hourly_rate * weekly_hours_worked)*4)

                # Append employee salary to the global_salaries list
                global_salaries.append(global_salary)

            else:

                # Check if the employee worked overtime
                if weekly_hours_worked > contract_hours:

                    # Calculate their overetime total monthly salary
                    over_time = weekly_hours_worked - contract_hours
                    contract_pay =  contract_hours * hourly_rate
                    overtime_pay = over_time * (hourly_rate * 1.5)
                    global_salary = (contract_pay + overtime_pay) * 4

                # Append salaries of the overtime workers to the global_salaries list
                global_salaries.append(global_salary)

        # Convert salaries into a float object
        for i in global_salaries:
            i = float(i)
            global_salaries_float.append(i)

    # Calculate the average, minimum, and maximum salaries across all companies
    max_global_employee_salary = max(global_salaries_float)
    min_global_employee_salary = min(global_salaries_float)
    global_salary_average = sum(global_salaries) / sum(number_of_global_employees)

    # Store global company statistics in the global_employees_statistics dictionary
    global_employees_statistics["Global"] = {
        "average_salary":    round(global_salary_average, 2),
        "highest_salary": round(max_global_employee_salary, 2),
        "lowest_salary":  round(min_global_employee_salary, 2)
        }
    
    # Return the global statistics
    return global_employees_statistics

# Initiate an instance of the function
print(getGlobalCompanyStatistics(all_company_data))

### Calculate ProjectLead company statistics

In [None]:
def getProjectLeadCompanyStatistics(d: dict) -> dict:

    """     
    Calculates the minimum, average, and maximum salary of ProjectLead employees in the source data dictionary.
    Agreegates employee details such as their names, job titile and salary into a dictionary.


    [Args]
    d (dict): A dictionary with the company names are the keys(strings) and the values are lists of dictionaries.
              Each dictionary contains the employee details including their name, job title, hourly rates, weekly hours worked
              and their contract hours.

   [Return]
    dict: A dictionary containing the ProjectLead company salary statistics, with keys:
          - 'average_salary': The average employee salary.
          - 'highest_salary': The highest employee salary.
          - 'lowest_salary': The lowest employee salary.
          - 'employee_details': A dictionary of the aggregated employee details.
    """

    # List to store employee salaries
    individual_salaries = []

    # List to store the employee salaries as float
    individual_salaries_float = []

    # List to store employee details 
    project_lead_employee_details = []

    # Dictionary to store company statistics
    project_lead_company_statistics = {}


    for company, company_employees in d.items():
        # Specify company data to work with
        if company == "ProjectLead":
            
            # Calculate the number of employees in the company
            number_of_project_lead_employees = float(len(company_employees))

            # Get employee details
            for personnel in company_employees:
                name = personnel.get("name", 0)
                job = personnel.get("job", 0)
                weekly_hours_worked = personnel.get("weekly_hours_worked", 0)
                contract_hours = personnel.get("contract_hours", 0)
                hourly_rate = personnel.get("hourly_rate", 0)
                
                # Check if the employee did not work overtime
                if weekly_hours_worked <= contract_hours:

                    # Calculate the employee's monthly salary
                    total_salary = ((hourly_rate * weekly_hours_worked)*4)

                    # Append the employee's salary to the individual_salaries list
                    individual_salaries.append(total_salary)

                else:

                    # Check if the employee worked overtime
                    if weekly_hours_worked > contract_hours:
                        
                        # Calculate employee's overtime monthly salary
                        over_time = weekly_hours_worked - contract_hours
                        contract_pay =  contract_hours * hourly_rate
                        overtime_pay = over_time * (hourly_rate * 1.5)
                        total_salary = (contract_pay + overtime_pay) * 4

                    # Append the salaries of the overtime workers to the individual_salaries list
                    individual_salaries.append(total_salary)

                # Append individual employee details to the project_lead_employee_details list:
                project_lead_employee_details.append({"name":  name, "job": job, "salary": total_salary})
            
            # Convert salaries into a float object
            for i in individual_salaries:
                i = float(i)
                individual_salaries_float.append(i)

            # Calculate the average, minimum, and maximum salaries
            max_project_lead_salary = max(individual_salaries_float)
            min_project_lead_salary = min(individual_salaries_float)
            project_lead_salary_average = (sum(individual_salaries_float) / number_of_project_lead_employees)
            
            # Store the company statistics
            project_lead_company_statistics["ProjectLead"] = {
                "average_salary": round(project_lead_salary_average, 2),
                "highest_salary": round(max_project_lead_salary, 2),
                "lowest_salary":  round(min_project_lead_salary, 2),
                "employee_details": project_lead_employee_details
            }
    # Return the company statistics
    return project_lead_company_statistics

# Initiate an instance of the function
print(getProjectLeadCompanyStatistics(all_company_data))

### Calculate TechCorp company statistics

In [None]:
def getTechCorpCompanyStatistics(d: dict) -> dict:
    
    """     
    Calculates the minimum, average, and maximum salary of TechCorp company employees in the source data dictionary.
    Agreegates employee details such as their names, job titile and salary into a dictionary.

    [Args]
    d (dict): A dictionary with the company names are the keys(strings) and the values are lists of dictionaries.
              Each dictionary contains the employee details including their name, job title, hourly rates, weekly hours worked
              and their contract hours.

   [Return]
    dict: A dictionary containing the ProjectLead company salary statistics, with keys:
          - 'average_salary': The average employee salary.
          - 'highest_salary': The highest employee salary.
          - 'lowest_salary': The lowest employee salary.
          - 'employee_details': A dictionary of the aggregated employee details.
    """

    # List to store employee salaries
    individual_salaries = []

    # List to store employee salaries as float
    individual_salaries_float = []

    # List to store employee details
    tech_corp_employee_details = []

    # Dictionary to store company statistics
    tech_corp_company_statistics = {}


    for company, company_employees in d.items():
        # Specify company data to work with
        if company == "TechCorp":

            # Calculate the number of employees in the company
            number_of_tech_corp_employees = float(len(company_employees))

            # Get employee details
            for personnel in company_employees:
                name = personnel.get("name", 0)
                job = personnel.get("job", 0)
                weekly_hours_worked = personnel.get("weekly_hours_worked", 0)
                contract_hours = personnel.get("contract_hours", 0)
                hourly_rate = personnel.get("hourly_rate", 0)
                
                # Check if the employee did not work overtime
                if weekly_hours_worked <= contract_hours:

                    # Calculate the employee’s monthly salary
                    total_salary = ((hourly_rate * weekly_hours_worked)*4)

                    # Append the employee’s salary to the individual_salaries list
                    individual_salaries.append(total_salary)

                else:

                    # Check if the employee worked overtime
                    if weekly_hours_worked > contract_hours:

                        # Calculate the employee's overtime monthly salary
                        over_time = weekly_hours_worked - contract_hours
                        contract_pay =  contract_hours * hourly_rate
                        overtime_pay = over_time * (hourly_rate * 1.5)
                        total_salary = (contract_pay + overtime_pay) * 4

                    # Append salaries of the overtime workers to the indivdual_salaries list
                    individual_salaries.append(total_salary)

                # Append individual employee details to the tech_corp_employee_details list
                tech_corp_employee_details.append({"name": name, "job": job, "salary": total_salary})

            # Convert salaries into a float object
            for i in individual_salaries:
                i = float(i)
                individual_salaries_float.append(i)

        # Calculate the average, minimum, and maximum salaries
        max_tech_corp_salary = max(individual_salaries_float)
        min_tech_corp_salary = min(individual_salaries_float)
        tech_corp_salary_average = sum(individual_salaries_float) / number_of_tech_corp_employees

        # Store the company statistics
        tech_corp_company_statistics["TechCorp"] = {
            "average_salary": round(tech_corp_salary_average, 2),
            "highest_salary": round(max_tech_corp_salary, 2),
            "lowest_salary": round(min_tech_corp_salary, 2),
            "employee_details": tech_corp_employee_details,
        }
    
    # Return the company statistics
    return tech_corp_company_statistics 

# Initiate an instance of the function
print(getTechCorpCompanyStatistics(all_company_data))

### Calculate DesignWorks company statistics

In [None]:
def getDesignWorksCompanyStatistics(d: dict) -> dict:
    
    """     
    Calculates the minimum, average, and maximum salary of DesignWorks company employees in the source data dictionary.
    Agreegates employee details such as their names, job titile and salary into a dictionary.

    [Args]
    d (dict): A dictionary with the company names are the keys(strings) and the values are lists of dictionaries.
              Each dictionary contains the employee details including their name, job title, hourly rates, weekly hours worked
              and their contract hours.

   [Return]
    dict: A dictionary containing the ProjectLead company salary statistics, with keys:
          - 'average_salary': The average employee salary.
          - 'highest_salary': The highest employee salary.
          - 'lowest_salary': The lowest employee salary.
          - 'employee_details': A dictionary of the aggregated employee details.
    """

    # List to store employee salaries
    individual_salaries = []

    # List to store employee salaries as float
    individual_salaries_float = []

    # List to store employee details
    design_works_employee_details = []

    # Dictionary to store final company statistics
    design_works_company_statistics = {}

    for company, company_employees in d.items():
        # Specify company data to work with
        if company == "DesignWorks":

            # Calculate the number of employees in the company
            number_of_design_works_employees = float(len(company_employees))

            # Get employee details
            for personnel in company_employees:
                name = personnel.get("name", 0)
                job = personnel.get("job", 0)
                weekly_hours_worked = personnel.get("weekly_hours_worked", 0)
                contract_hours = personnel.get("contract_hours", 0)
                hourly_rate = personnel.get("hourly_rate", 0)

                # Check if the employee did not work overtime
                if weekly_hours_worked <= contract_hours:
                    
                    # Calculate the employee’s monthly salary
                    total_salary = ((hourly_rate * weekly_hours_worked)*4)

                    # Append the employee’s salary to the individual_salaries list
                    individual_salaries.append(total_salary)

                else:

                    # Check if the employee worked overtime
                    if weekly_hours_worked > contract_hours:
                        over_time = weekly_hours_worked - contract_hours
                        contract_pay =  contract_hours * hourly_rate
                        overtime_pay = over_time * (hourly_rate * 1.5)

                        # Calculate their overtime total monthly salary
                        total_salary = (contract_pay + overtime_pay) * 4

                    # Append salaries of the overtime workers to the individual_salaries list
                    individual_salaries.append(total_salary)

                # Append individual employee details to the design_works_employee_details list:
                design_works_employee_details.append({"name": name, "job": job, "salary": total_salary})

            # Convert salaries into a float object
            for i in individual_salaries:
                i = float(i)
                individual_salaries_float.append(i)
    
            # Calculate the average, minimum, and maximum salaries
            design_salary_average = sum(individual_salaries_float) / number_of_design_works_employees
            max_design_works_salary = max(individual_salaries_float)
            min_design_works_salary = min(individual_salaries_float)

            # Store the company statistics
            design_works_company_statistics["DesignWorks"] = {
            "average_salary": round(design_salary_average, 2),
            "highest_salary": round(max_design_works_salary, 2),
            "lowest_salary": round(min_design_works_salary, 2),
            "employee_details": design_works_employee_details,
            }

    # Return the company statistics 
    return design_works_company_statistics

# Initiate an instance of the function
print(getDesignWorksCompanyStatistics(all_company_data))

### Aggregate all company (and global) statistics and print the results.

In [None]:
def aggregate_statistics():

    # Initialize an empty dictionary to hold the aggregated statistics
    aggregate_dictionary = {}

    # Retrieve statistics for all companies and the global summary
    global_stats = getGlobalCompanyStatistics(raw_data)
    project_lead_stats = getProjectLeadCompanyStatistics(raw_data)
    tech_corp_stats = getTechCorpCompanyStatistics(raw_data)
    design_works_stats = getDesignWorksCompanyStatistics(raw_data)

    # Update the aggregate dictionary with the statistics from each company
    aggregate_dictionary.update(tech_corp_stats)
    aggregate_dictionary.update(design_works_stats)
    aggregate_dictionary.update(project_lead_stats)
    aggregate_dictionary.update(global_stats)

    # Return the aggregate statistics dictionary
    return aggregate_dictionary

# Get the aggregated statistics data for all companies
stats_data = aggregate_statistics()


def print_statistics(stats):
    # Loop through each company and its statistics in the aggregated data
    for company, stats in stats_data.items():
        print(f"Entreprise: {company}")

        # Check if "employee_details" key exists and is not empty
        if "employee_details" not in stats or not stats["employee_details"]:
            # Skip if no employee details are found
            pass 

        else:

            # Sort employee details by salary in descending order
            sorted_employee_details = sorted(stats["employee_details"], key=lambda x: x["salary"], reverse=True)
            
            # Print each employee's details with formatted columns
            for employee in sorted_employee_details:
                print(f'{employee["name"]:<10} | {employee["job"]:<15} | Salaire mensuel: {employee["salary"]:.2f}€')

        # Print company-specific salary statistics
        print("\n========================================================")
        print(f"Statistiques des salaires pour l'entreprise {company}:")
        print(f"Salaire moyen: {stats['average_salary']:.2f}€")
        print(f"Salaire le plus élevé: {stats['highest_salary']:.2f}€")
        print(f"Salaire le plus bas: {stats['lowest_salary']:.2f}€")
        print("========================================================\n")

# Print the statistics for all companies
print_statistics(stats_data)


### Bonus (1): Generate CSV file

In [None]:
def generate_csv(stats_data, filename='salary_statistics.csv'):
    with open(filename, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)

        # Write the header for individual salaries
        writer.writerow(['Company', 'Employee Name', 'Job Title', 'Monthly Salary (€)'])

        # Write the individual salary data
        for company, stats in stats_data.items():
            if "employee_details" not in stats or not stats["employee_details"]:
                pass
            else:
                for employee in stats["employee_details"]:
                    writer.writerow([company, employee["name"], employee["job"], round(employee["salary"], 2)])

        # Add a blank row
        writer.writerow([])

        # Write the header for salary statistics
        writer.writerow(['Company', 'Average Salary (€)', 'Highest Salary (€)', 'Lowest Salary (€)'])

        # Write the salary statistics data
        for company, stats in stats_data.items():
            if "employee_details" not in stats or not stats["employee_details"]:
                pass
            else:
            #if stats["employee_details"]:
                writer.writerow([company, stats['average_salary'], stats['highest_salary'], stats['lowest_salary']])

    print(f'CSV file "{filename}" generated successfully.')

# Call the function to generate CSV
generate_csv(stats_data)

### Read and load csv file

In [None]:
# Read the created summary statistics csv file
data_to_read = pd.read_csv("salary_statistics_streamlit.csv")

# Load the csv file
data_to_read 

# You may want to download Raninbow csv extension to view data in a more beautiful format