In [41]:
#Task 1 - creating an emissions standard filtering function

import pandas as pd

# Load the dataframes
simplified_buses = pd.read_csv("https://raw.githubusercontent.com/futureCodersSE/python-programming-for-data/main/Datasets/simplified_12_10_22.csv")
regs = pd.read_csv("https://raw.githubusercontent.com/futureCodersSE/python-programming-for-data/main/Datasets/bus_regs.csv")

# Inspect the dataframes
print("simplified_buses DataFrame:")
print(simplified_buses.head())
print(simplified_buses.columns)

print("\nregs DataFrame:")
print(regs.head())
print(regs.columns)

def filter_by_emissions_class(buses_df, regs_df, emissions_class):
    """
    Filters the buses dataframe by the specified emissions class using the regs dataframe.

    Parameters:
    buses_df (pd.DataFrame): The dataframe containing bus journey data.
    regs_df (pd.DataFrame): The dataframe containing bus registration and emissions class data.
    emissions_class (str): The emissions class to filter by (e.g., 'Euro III').

    Returns:
    pd.DataFrame: A filtered dataframe containing only the buses of the specified emissions class.
    """
    # Clean column names
    regs_df.columns = regs_df.columns.str.strip()
    buses_df.columns = buses_df.columns.str.strip()

    # Standardize case for comparison
    regs_df['Emission Class'] = regs_df['Emission Class'].str.upper().str.strip()
    emissions_class = emissions_class.upper().strip()

    # Debug: Print unique values for verification
    print(f"Unique emission classes in regs_df: {regs_df['Emission Class'].unique()}")
    print(f"Unique vehicle refs in buses_df: {buses_df['VehicleRef'].unique()}")

    # Step 1: Get a list of vehicle references for the specified emissions class
    vehicle_refs = regs_df[regs_df['Emission Class'] == emissions_class]['Last tracked'].tolist()
    print(f"Vehicle refs for {emissions_class}: {vehicle_refs}")

    # Step 2: Filter the buses dataframe based on the vehicle references
    filtered_buses_df = buses_df[buses_df['VehicleRef'].isin(vehicle_refs)]

    # Debug: Print the result
    print(f"Filtered buses dataframe shape: {filtered_buses_df.shape}")
    return filtered_buses_df

# Apply the function
filtered_buses = filter_by_emissions_class(simplified_buses, regs, 'Euro III')
print(filtered_buses.head())




simplified_buses DataFrame:
   Unnamed: 0 LineRef              OriginName  \
0           0     132  Waterfront_Bus_Station   
1           1     660          Tunbury_Avenue   
2           2       8      Maidstone_Hospital   
3           3     116  Waterfront_Bus_Station   
4           4      85             Freeman_Way   

                    DestinationName   OriginAimedDepartureTime  VehicleRef  \
0  Hempstead_Valley_Shopping_Centre  2022-10-12T09:30:00+00:00        4314   
1                       Breton_Road  2022-10-12T07:28:00+00:00        6412   
2                       King_Street  2022-10-12T08:55:00+00:00        6448   
3  Hempstead_Valley_Shopping_Centre  2022-10-12T08:42:00+00:00        4224   
4              Chequers_Bus_Station  2022-10-12T09:53:00+00:00        6152   

   VehicleLocation.Longitude  VehicleLocation.Latitude  
0                   0.590251                 51.367281  
1                   0.499455                 51.370171  
2                   0.523764         

In [47]:
# Task 2 - a function which calculates the percentage of buses in the given period that were a particular emissions class

import pandas as pd

# Load the dataframes
simplified_buses = pd.read_csv("https://raw.githubusercontent.com/futureCodersSE/python-programming-for-data/main/Datasets/simplified_12_10_22.csv")
regs = pd.read_csv("https://raw.githubusercontent.com/futureCodersSE/python-programming-for-data/main/Datasets/bus_regs.csv")

def filter_by_emissions_class(buses_df, regs_df, emissions_class):
    """
    Filters the buses dataframe by the specified emissions class using the regs dataframe.
    """
    # Clean column names
    regs_df.columns = regs_df.columns.str.strip()
    buses_df.columns = buses_df.columns.str.strip()

    # Standardize case for comparison
    regs_df['Emission Class'] = regs_df['Emission Class'].str.upper().str.strip()
    emissions_class = emissions_class.upper().strip()

    # Convert 'Last tracked' to integer for comparison
    regs_df['Last tracked'] = regs_df['Last tracked'].astype(str).str.strip()
    
    # Debug: Print unique values for verification
    print(f"Unique emission classes in regs_df: {regs_df['Emission Class'].unique()}")
    print(f"Unique vehicle refs in buses_df: {buses_df['VehicleRef'].unique()}")

    # Step 1: Get a list of vehicle references for the specified emissions class
    vehicle_refs = regs_df[regs_df['Emission Class'] == emissions_class]['Last tracked'].tolist()
    print(f"Vehicle refs for {emissions_class}: {vehicle_refs}")

    # Convert vehicle refs in buses_df to string for comparison
    buses_df['VehicleRef'] = buses_df['VehicleRef'].astype(str)
    
    # Step 2: Filter the buses dataframe based on the vehicle references
    filtered_buses_df = buses_df[buses_df['VehicleRef'].isin(vehicle_refs)]

    # Debug: Print the result
    print(f"Filtered buses dataframe shape: {filtered_buses_df.shape}")
    return filtered_buses_df

def percentage_of_emissions_class(buses_df, emissions_class):
    """
    Calculates the percentage of buses in the given period that were of a particular emissions class.
    """
    # Call the filter_by_emissions_class function to get the subset of buses
    subset = filter_by_emissions_class(buses_df, regs, emissions_class)
    
    # Calculate the percentage
    total_buses = len(buses_df)
    subset_buses = len(subset)
    
    # Avoid division by zero
    if total_buses == 0:
        return 0.0

    percentage = (subset_buses / total_buses) * 100
    
    return round(percentage, 2)

# Example usage:
percentage = percentage_of_emissions_class(simplified_buses, 'Euro III')
print(f"Percentage of buses with Euro III emissions class: {percentage}%")


Unique emission classes in regs_df: ['EURO III' 'EURO IV' 'EURO V' 'EURO VI']
Unique vehicle refs in buses_df: [4314 6412 6448 4224 6152 3984 4005 4310 4300 6471 3987 6483 4280 4464
 6147 1607 6478 4311 4271 6138 4266 4048 6146 6406 4292 6476 6528 6420
 4296 6440 4060 4317 4267 4281 6430 4270 4063 4120 4075 4263 4264 4273
 6431 6139 4294 6442 4066 6410 1654 4056 4221 4318 6149 6413 6429 6438
 6439 6484 4059 6404 4460 6143 6475 6405 1635 4058 4061 4065 4073 4297
 4315 6421 4316 6260 6402 6411 6477 4313 6449 3988 6447 6480 4074 4282
 6474 4072 4283 6408 4284 4471 4319 6238 6401 6428 4309 4261 4265 4268
 4299 1652 6473 6446 6472 6481 6437 4126 1651 4291 4308 1636 4051 4272
 4293 4312 6522 1616 1640 1639 4470 4222 4279 4057 4013 1650 4006 1653
 4014 1655 6127 6148 1641 4289 4452 4007 6441 4290 3906 4106 4219 4463
 6493 6007 4071 4450 4055 6132 6521 6203 6136 1642 4469 1609 6444 4298
 6520 4121 4064 6445 4234 1633 3908 1656]
Vehicle refs for EURO III: ['6260', '1607', '1609', '1616', '6401'

In [25]:
!pip install geopy

Collecting geopy
  Downloading geopy-2.4.1-py3-none-any.whl.metadata (6.8 kB)
Collecting geographiclib<3,>=1.52 (from geopy)
  Downloading geographiclib-2.0-py3-none-any.whl.metadata (1.4 kB)
Downloading geopy-2.4.1-py3-none-any.whl (125 kB)
   ---------------------------------------- 0.0/125.4 kB ? eta -:--:--
   --- ------------------------------------ 10.2/125.4 kB ? eta -:--:--
   ------------ -------------------------- 41.0/125.4 kB 495.5 kB/s eta 0:00:01
   ---------------------------------------- 125.4/125.4 kB 1.1 MB/s eta 0:00:00
Downloading geographiclib-2.0-py3-none-any.whl (40 kB)
   ---------------------------------------- 0.0/40.3 kB ? eta -:--:--
   ---------------------------------------- 40.3/40.3 kB 1.9 MB/s eta 0:00:00
Installing collected packages: geographiclib, geopy
Successfully installed geographiclib-2.0 geopy-2.4.1


In [53]:
# Task 3 - creating a function to calculate total distance
import pandas as pd
from geopy.distance import geodesic

# Load the dataframes
simplified_buses = pd.read_csv("https://raw.githubusercontent.com/futureCodersSE/python-programming-for-data/main/Datasets/simplified_12_10_22.csv")
regs = pd.read_csv("https://raw.githubusercontent.com/futureCodersSE/python-programming-for-data/main/Datasets/bus_regs.csv")

def filter_by_emissions_class(buses_df, regs_df, emissions_class):
    """
    Filters the buses dataframe by the specified emissions class using the regs dataframe.
    """
    # Clean column names
    regs_df.columns = regs_df.columns.str.strip()
    buses_df.columns = buses_df.columns.str.strip()

    # Standardize case for comparison
    regs_df['Emission Class'] = regs_df['Emission Class'].str.upper().str.strip()
    emissions_class = emissions_class.upper().strip()

    # Convert 'Last tracked' to string for comparison
    regs_df['Last tracked'] = regs_df['Last tracked'].astype(str).str.strip()
    
    # Get vehicle references for the specified emissions class
    vehicle_refs = regs_df[regs_df['Emission Class'] == emissions_class]['Last tracked'].tolist()
    
    # Convert vehicle refs in buses_df to string for comparison
    buses_df['VehicleRef'] = buses_df['VehicleRef'].astype(str)
    
    # Filter the buses dataframe based on the vehicle references
    filtered_buses_df = buses_df[buses_df['VehicleRef'].isin(vehicle_refs)]
    
    return filtered_buses_df

def calculate_total_distance(filtered_df):
    """
    Calculates the total distance traveled by buses in the filtered DataFrame.
    """
    if filtered_df.empty:
        print("Filtered DataFrame is empty.")
        return 0.0

    # Extract latitude and longitude into lists
    lats = filtered_df['VehicleLocation.Latitude'].tolist()
    longs = filtered_df['VehicleLocation.Longitude'].tolist()

    # Print sample coordinates for debugging
    print("Sample Coordinates:")
    for lat, long in zip(lats[:5], longs[:5]):
        print(f"Latitude: {lat}, Longitude: {long}")

    # Initialize total distance
    total_distance = 0.0
    
    # Loop through the lists to calculate distances between consecutive points
    for i in range(len(lats) - 1):
        origin = (lats[i], longs[i])
        destination = (lats[i + 1], longs[i + 1])
        distance = geodesic(origin, destination).kilometers
        total_distance += distance

    return total_distance

# Example usage:
filtered_buses = filter_by_emissions_class(simplified_buses, regs, 'Euro III')
print(f"Filtered buses DataFrame shape: {filtered_buses.shape}")

total_distance = calculate_total_distance(filtered_buses)
print(f"Total distance traveled by Euro III buses: {total_distance:.2f} km")



Filtered buses DataFrame shape: (1904, 8)
Sample Coordinates:
Latitude: 51.370171, Longitude: 0.499455
Latitude: 51.260458, Longitude: 0.523764
Latitude: 51.246275, Longitude: 0.55889
Latitude: 51.385351, Longitude: 0.52429
Latitude: 51.274431, Longitude: 0.535425
Total distance traveled by Euro III buses: 15878.95 km


In [55]:
# Task 4
# Task:

#     write a function that takes the emissions_standard as a parameter
#     inside your function, call your function from Task 1, parsing the parameters emissions_standard, and the simplified_buses dataframe, saving the result in a variable
#     call your distance calculation function from task 2, parsing through the dataframe variable youve just created above, saving the result in a variable called distance
#     use a for loop to iterate through the emissions_data dictionary
#     use an if statement to match up to the correct emissions standard dictionary
#     calculate the total Co2, Nox and PM usage (use the distance variable you just made)
#     return a dictionary containing the standard, the distance travelled by all buses of that standard in the period and the three total emissions calculated

import pandas as pd
from geopy.distance import geodesic

# Load the dataframes
simplified_buses = pd.read_csv("https://raw.githubusercontent.com/futureCodersSE/python-programming-for-data/main/Datasets/simplified_12_10_22.csv")
regs = pd.read_csv("https://raw.githubusercontent.com/futureCodersSE/python-programming-for-data/main/Datasets/bus_regs.csv")

def filter_by_emissions_class(buses_df, regs_df, emissions_class):
    """
    Filters the buses dataframe by the specified emissions class using the regs dataframe.
    """
    # Clean column names
    regs_df.columns = regs_df.columns.str.strip()
    buses_df.columns = buses_df.columns.str.strip()

    # Standardize case for comparison
    regs_df['Emission Class'] = regs_df['Emission Class'].str.upper().str.strip()
    emissions_class = emissions_class.upper().strip()

    # Convert 'Last tracked' to string for comparison
    regs_df['Last tracked'] = regs_df['Last tracked'].astype(str).str.strip()
    
    # Debug: Print unique values for verification
    print(f"Unique emission classes in regs_df: {regs_df['Emission Class'].unique()}")
    print(f"Unique vehicle refs in buses_df: {buses_df['VehicleRef'].unique()}")

    # Step 1: Get a list of vehicle references for the specified emissions class
    vehicle_refs = regs_df[regs_df['Emission Class'] == emissions_class]['Last tracked'].tolist()
    print(f"Vehicle refs for {emissions_class}: {vehicle_refs}")

    # Convert vehicle refs in buses_df to string for comparison
    buses_df['VehicleRef'] = buses_df['VehicleRef'].astype(str)
    
    # Step 2: Filter the buses dataframe based on the vehicle references
    filtered_buses_df = buses_df[buses_df['VehicleRef'].isin(vehicle_refs)]

    # Debug: Print the result
    print(f"Filtered buses dataframe shape: {filtered_buses_df.shape}")
    return filtered_buses_df

def calculate_total_distance(filtered_df):
    """
    Calculates the total distance traveled by buses in the filtered DataFrame.

    Parameters:
    filtered_df (pd.DataFrame): A DataFrame containing bus journey data filtered for a particular emission class.

    Returns:
    float: The total distance traveled in kilometers.
    """
    # Extract latitude and longitude into lists
    lats = filtered_df['VehicleLocation.Latitude'].tolist()
    longs = filtered_df['VehicleLocation.Longitude'].tolist()
    
    # Initialize total distance
    total_distance = 0.0
    
    # Loop through the lists to calculate distances between consecutive points
    for i in range(len(lats) - 1):
        origin = (lats[i], longs[i])
        destination = (lats[i + 1], longs[i + 1])
        distance = geodesic(origin, destination).kilometers
        total_distance += distance
    
    return total_distance

def get_emissions_data():
    """
    Returns a list of dictionaries with emission values for each emission standard.
    """
    return [
        {"Standard": "EURO III", "CO2": 2.1, "Nox": 5, "PM": 0.1 },
        {"Standard": "EURO IV", "CO2": 1.5, "Nox": 3.5, "PM": 0.02 },
        {"Standard": "EURO V", "CO2": 1.5, "Nox": 2, "PM": 0.02 },
        {"Standard": "EURO VI", "CO2": 1.5, "Nox": 0.4, "PM": 0.01 }
    ]

def calculate_emissions(emissions_standard):
    """
    Calculates the total emissions for buses of a specific emission standard.

    Parameters:
    emissions_standard (str): The emission standard to filter by (e.g., 'EURO III').

    Returns:
    dict: A dictionary with emission data for the specified standard.
    """
    # Get the emissions data
    emissions_data = get_emissions_data()
    
    # Call filter_by_emissions_class to get filtered buses
    filtered_buses = filter_by_emissions_class(simplified_buses, regs, emissions_standard)
    
    # Calculate total distance using the filtered data
    distance = calculate_total_distance(filtered_buses)
    
    # Define energy consumption per km
    energy_per_km = 5.08  # kWh/km
    
    # Find the emission rates for the given standard
    emissions_dict = next((item for item in emissions_data if item["Standard"] == emissions_standard), None)
    
    if emissions_dict:
        # Calculate total energy consumption
        total_energy_consumption = distance * energy_per_km
        
        # Calculate total emissions
        total_CO2 = total_energy_consumption * emissions_dict["CO2"]
        total_Nox = total_energy_consumption * emissions_dict["Nox"]
        total_PM = total_energy_consumption * emissions_dict["PM"]
        
        # Return results as a dictionary
        return {
            'Standard': emissions_standard,
            'Total distance': distance,
            'CO2': total_CO2,
            'Nox': total_Nox,
            'PM': total_PM
        }
    else:
        return {
            'Standard': emissions_standard,
            'Total distance': 0,
            'CO2': 0,
            'Nox': 0,
            'PM': 0
        }

# Call the function for each euro standard and collect results
bus_emissions_by_standard = []
for standard in ["EURO III", "EURO IV", "EURO V", "EURO VI"]:
    emissions = calculate_emissions(standard)
    bus_emissions_by_standard.append(emissions)

# Print the results
print(bus_emissions_by_standard)


Unique emission classes in regs_df: ['EURO III' 'EURO IV' 'EURO V' 'EURO VI']
Unique vehicle refs in buses_df: [4314 6412 6448 4224 6152 3984 4005 4310 4300 6471 3987 6483 4280 4464
 6147 1607 6478 4311 4271 6138 4266 4048 6146 6406 4292 6476 6528 6420
 4296 6440 4060 4317 4267 4281 6430 4270 4063 4120 4075 4263 4264 4273
 6431 6139 4294 6442 4066 6410 1654 4056 4221 4318 6149 6413 6429 6438
 6439 6484 4059 6404 4460 6143 6475 6405 1635 4058 4061 4065 4073 4297
 4315 6421 4316 6260 6402 6411 6477 4313 6449 3988 6447 6480 4074 4282
 6474 4072 4283 6408 4284 4471 4319 6238 6401 6428 4309 4261 4265 4268
 4299 1652 6473 6446 6472 6481 6437 4126 1651 4291 4308 1636 4051 4272
 4293 4312 6522 1616 1640 1639 4470 4222 4279 4057 4013 1650 4006 1653
 4014 1655 6127 6148 1641 4289 4452 4007 6441 4290 3906 4106 4219 4463
 6493 6007 4071 4450 4055 6132 6521 6203 6136 1642 4469 1609 6444 4298
 6520 4121 4064 6445 4234 1633 3908 1656]
Vehicle refs for EURO III: ['6260', '1607', '1609', '1616', '6401'

In [57]:

# Task 5

# Find the percentage of the total emissions produced by all the buses for each emission standard

#     Write a function which takes an emissions class as a parameter
#     Create a variable, for each type of emission (CO2, Nox, PM) , which holds the total emissions (add up all the values from the dictionary you created in the last task using a for loop)
#     Using the given emissions class, calculate the percentage of the total emissions are for the emissions class (the emissions class emissions / total emissions * 100)
#     return the percentage
#     print comparisons of each standard, its percentage of emissions and the percentage of the buses that were that standard (use your function from Task 2)



import pandas as pd
from geopy.distance import geodesic

# Load the dataframes
simplified_buses = pd.read_csv("https://raw.githubusercontent.com/futureCodersSE/python-programming-for-data/main/Datasets/simplified_12_10_22.csv")
regs = pd.read_csv("https://raw.githubusercontent.com/futureCodersSE/python-programming-for-data/main/Datasets/bus_regs.csv")

def filter_by_emissions_class(buses_df, regs_df, emissions_class):
    """
    Filters the buses dataframe by the specified emissions class using the regs dataframe.
    """
    # Clean column names
    regs_df.columns = regs_df.columns.str.strip()
    buses_df.columns = buses_df.columns.str.strip()

    # Standardize case for comparison
    regs_df['Emission Class'] = regs_df['Emission Class'].str.upper().str.strip()
    emissions_class = emissions_class.upper().strip()

    # Convert 'Last tracked' to string for comparison
    regs_df['Last tracked'] = regs_df['Last tracked'].astype(str).str.strip()
    
    # Step 1: Get a list of vehicle references for the specified emissions class
    vehicle_refs = regs_df[regs_df['Emission Class'] == emissions_class]['Last tracked'].tolist()
    
    # Convert vehicle refs in buses_df to string for comparison
    buses_df['VehicleRef'] = buses_df['VehicleRef'].astype(str)
    
    # Step 2: Filter the buses dataframe based on the vehicle references
    filtered_buses_df = buses_df[buses_df['VehicleRef'].isin(vehicle_refs)]

    return filtered_buses_df

def calculate_total_distance(filtered_df):
    """
    Calculates the total distance traveled by buses in the filtered DataFrame.
    """
    # Extract latitude and longitude into lists
    lats = filtered_df['VehicleLocation.Latitude'].tolist()
    longs = filtered_df['VehicleLocation.Longitude'].tolist()
    
    # Initialize total distance
    total_distance = 0.0
    
    # Loop through the lists to calculate distances between consecutive points
    for i in range(len(lats) - 1):
        origin = (lats[i], longs[i])
        destination = (lats[i + 1], longs[i + 1])
        distance = geodesic(origin, destination).kilometers
        total_distance += distance
    
    return total_distance

def get_emissions_data():
    """
    Returns a list of dictionaries with emission values for each emission standard.
    """
    return [
        {"Standard": "EURO III", "CO2": 2.1, "Nox": 5, "PM": 0.1 },
        {"Standard": "EURO IV", "CO2": 1.5, "Nox": 3.5, "PM": 0.02 },
        {"Standard": "EURO V", "CO2": 1.5, "Nox": 2, "PM": 0.02 },
        {"Standard": "EURO VI", "CO2": 1.5, "Nox": 0.4, "PM": 0.01 }
    ]

def calculate_emissions(emissions_standard):
    """
    Calculates the total emissions for buses of a specific emission standard.
    """
    # Get the emissions data
    emissions_data = get_emissions_data()
    
    # Call filter_by_emissions_class to get filtered buses
    filtered_buses = filter_by_emissions_class(simplified_buses, regs, emissions_standard)
    
    # Calculate total distance using the filtered data
    distance = calculate_total_distance(filtered_buses)
    
    # Define energy consumption per km
    energy_per_km = 5.08  # kWh/km
    
    # Find the emission rates for the given standard
    emissions_dict = next((item for item in emissions_data if item["Standard"] == emissions_standard), None)
    
    if emissions_dict:
        # Calculate total energy consumption
        total_energy_consumption = distance * energy_per_km
        
        # Calculate total emissions
        total_CO2 = total_energy_consumption * emissions_dict["CO2"]
        total_Nox = total_energy_consumption * emissions_dict["Nox"]
        total_PM = total_energy_consumption * emissions_dict["PM"]
        
        # Return results as a dictionary
        return {
            'Standard': emissions_standard,
            'Total distance': distance,
            'CO2': total_CO2,
            'Nox': total_Nox,
            'PM': total_PM
        }
    else:
        return {
            'Standard': emissions_standard,
            'Total distance': 0,
            'CO2': 0,
            'Nox': 0,
            'PM': 0
        }

def calculate_percentage_of_emissions(emissions_standard, total_emissions):
    """
    Calculates the percentage of total emissions for a given emissions standard.
    """
    emissions = calculate_emissions(emissions_standard)
    
    # Calculate percentages for CO2, Nox, PM
    percent_CO2 = (emissions['CO2'] / total_emissions['CO2']) * 100 if total_emissions['CO2'] > 0 else 0
    percent_Nox = (emissions['Nox'] / total_emissions['Nox']) * 100 if total_emissions['Nox'] > 0 else 0
    percent_PM = (emissions['PM'] / total_emissions['PM']) * 100 if total_emissions['PM'] > 0 else 0
    
    return {
        'Standard': emissions_standard,
        'CO2 %': round(percent_CO2, 2),
        'Nox %': round(percent_Nox, 2),
        'PM %': round(percent_PM, 2)
    }

# Calculate total emissions across all standards
total_emissions = {
    'CO2': 0,
    'Nox': 0,
    'PM': 0
}
for standard in ["EURO III", "EURO IV", "EURO V", "EURO VI"]:
    emissions = calculate_emissions(standard)
    total_emissions['CO2'] += emissions['CO2']
    total_emissions['Nox'] += emissions['Nox']
    total_emissions['PM'] += emissions['PM']

# Calculate percentage of emissions for each standard
emissions_percentage_by_standard = []
for standard in ["EURO III", "EURO IV", "EURO V", "EURO VI"]:
    percentage_emissions = calculate_percentage_of_emissions(standard, total_emissions)
    emissions_percentage_by_standard.append(percentage_emissions)

# Calculate percentage of buses for each standard
def calculate_percentage_of_buses(emissions_class):
    """
    Calculates the percentage of buses that are of a given emissions class.
    """
    # Total number of buses
    total_buses = len(simplified_buses)
    
    # Filter buses by emissions class
    filtered_buses = filter_by_emissions_class(simplified_buses, regs, emissions_class)
    num_buses = len(filtered_buses)
    
    # Calculate percentage
    percentage = (num_buses / total_buses) * 100 if total_buses > 0 else 0
    
    return round(percentage, 2)

# Print comparisons of each standard
for standard in ["EURO III", "EURO IV", "EURO V", "EURO VI"]:
    percent_buses = calculate_percentage_of_buses(standard)
    emissions_percentage = next(item for item in emissions_percentage_by_standard if item['Standard'] == standard)
    print(f"{standard}:")
    print(f"  Percentage of Buses: {percent_buses}%")
    print(f"  Percentage of CO2 Emissions: {emissions_percentage['CO2 %']}%")
    print(f"  Percentage of Nox Emissions: {emissions_percentage['Nox %']}%")
    print(f"  Percentage of PM Emissions: {emissions_percentage['PM %']}%")
    print()


EURO III:
  Percentage of Buses: 16.93%
  Percentage of CO2 Emissions: 21.1%
  Percentage of Nox Emissions: 41.06%
  Percentage of PM Emissions: 57.04%

EURO IV:
  Percentage of Buses: 12.66%
  Percentage of CO2 Emissions: 14.18%
  Percentage of Nox Emissions: 27.03%
  Percentage of PM Emissions: 10.73%

EURO V:
  Percentage of Buses: 19.86%
  Percentage of CO2 Emissions: 20.43%
  Percentage of Nox Emissions: 22.26%
  Percentage of PM Emissions: 15.46%

EURO VI:
  Percentage of Buses: 50.55%
  Percentage of CO2 Emissions: 44.3%
  Percentage of Nox Emissions: 9.65%
  Percentage of PM Emissions: 16.77%



In [61]:
# Task 6
# Create a function to filter the dataframe to just rows of buses in the AQMA boundary of Rainham High Street (similiar to the Task 9 in simplification worksheet)

import pandas as pd

# Load the dataframes
simplified_buses = pd.read_csv("https://raw.githubusercontent.com/futureCodersSE/python-programming-for-data/main/Datasets/simplified_12_10_22.csv")

def filter_aqma_boundary(df):
    """
    Filters the DataFrame to include only rows where the buses passed through the AQMA boundary 
    of Rainham High Street based on latitude and longitude.

    Parameters:
    df (pd.DataFrame): The DataFrame containing bus journey data.

    Returns:
    pd.DataFrame: A filtered DataFrame with rows that fall within the AQMA boundary.
    """
    # Define the AQMA boundary
    min_lat = 51.361462
    max_lat = 51.364935
    min_long = 0.603210
    max_long = 0.617510
    
    # Apply the filtering conditions
    filtered_df = df[
        (df['VehicleLocation.Latitude'] >= min_lat) & 
        (df['VehicleLocation.Latitude'] <= max_lat) & 
        (df['VehicleLocation.Longitude'] >= min_long) & 
        (df['VehicleLocation.Longitude'] <= max_long)
    ]
    
    return filtered_df

# Apply the filtering function to the simplified_buses DataFrame
filtered_buses_aqma = filter_aqma_boundary(simplified_buses)

# Display the shape and the first few rows of the filtered DataFrame for verification
print(f"Filtered dataframe shape: {filtered_buses_aqma.shape}")
print(filtered_buses_aqma.head())

# Additional info
total_rows = len(simplified_buses)
filtered_rows = len(filtered_buses_aqma)
print(f"\nTotal rows in original DataFrame: {total_rows}")
print(f"Rows within AQMA boundary: {filtered_rows}")
print(f"Percentage of buses within AQMA boundary: {filtered_rows / total_rows * 100:.2f}%")


Filtered dataframe shape: (89, 8)
     Unnamed: 0 LineRef                        OriginName  \
162         179     132            Waterfront_Bus_Station   
259         311     132  Hempstead_Valley_Shopping_Centre   
356         453     132            Waterfront_Bus_Station   
456         593     132  Hempstead_Valley_Shopping_Centre   
594         785     132  Hempstead_Valley_Shopping_Centre   

                      DestinationName   OriginAimedDepartureTime  VehicleRef  \
162  Hempstead_Valley_Shopping_Centre  2022-10-12T09:30:00+00:00        4314   
259            Waterfront_Bus_Station  2022-10-12T09:42:00+00:00        3987   
356  Hempstead_Valley_Shopping_Centre  2022-10-12T09:42:00+00:00        6401   
456            Waterfront_Bus_Station  2022-10-12T09:54:00+00:00        4293   
594            Waterfront_Bus_Station  2022-10-12T10:06:00+00:00        4310   

     VehicleLocation.Longitude  VehicleLocation.Latitude  
162                   0.607713                 51.363476  


In [89]:
# Task 7
# Using the filtered dataframe from Task 6, repeat tasks 4 and 5 on just the buses in the AQMA.

import pandas as pd

# Load the dataframes
simplified_buses = pd.read_csv("https://raw.githubusercontent.com/futureCodersSE/python-programming-for-data/main/Datasets/simplified_12_10_22.csv")
regs = pd.read_csv("https://raw.githubusercontent.com/futureCodersSE/python-programming-for-data/main/Datasets/bus_regs.csv")

# Ensure 'VehicleRef' column is of type str
simplified_buses['VehicleRef'] = simplified_buses['VehicleRef'].astype(str)

def filter_by_emissions_class(buses_df, regs_df, emissions_class):
    """
    Filters the buses dataframe by the specified emissions class using the regs dataframe.
    """
    # Clean column names
    regs_df.columns = regs_df.columns.str.strip()
    buses_df.columns = buses_df.columns.str.strip()

    # Standardize case for comparison
    regs_df['Emission Class'] = regs_df['Emission Class'].str.upper().str.strip()
    emissions_class = emissions_class.upper().strip()

    # Convert 'Last tracked' to string for comparison
    regs_df['Last tracked'] = regs_df['Last tracked'].astype(str).str.strip()
    
    # Get a list of vehicle references for the specified emissions class
    vehicle_refs = regs_df[regs_df['Emission Class'] == emissions_class]['Last tracked'].tolist()

    # Ensure 'VehicleRef' column is of type str
    buses_df['VehicleRef'] = buses_df['VehicleRef'].astype(str)
    
    # Filter the buses dataframe based on the vehicle references
    filtered_buses_df = buses_df[buses_df['VehicleRef'].isin(vehicle_refs)].copy()

    return filtered_buses_df

def calculate_total_distance(filtered_df):
    """
    Calculates the total distance traveled by buses in the filtered DataFrame.
    """
    # Extract latitude and longitude into lists
    lats = filtered_df['VehicleLocation.Latitude'].tolist()
    longs = filtered_df['VehicleLocation.Longitude'].tolist()
    
    # Initialize total distance
    total_distance = 0.0
    
    # Loop through the lists to calculate distances between consecutive points
    for i in range(len(lats) - 1):
        origin = (lats[i], longs[i])
        destination = (lats[i + 1], longs[i + 1])
        distance = geodesic(origin, destination).kilometers
        total_distance += distance
    
    return total_distance

def get_emissions_data():
    """
    Returns a list of dictionaries with emission values for each emission standard.
    """
    return [
        {"Standard": "EURO III", "CO2": 2.1, "Nox": 5, "PM": 0.1 },
        {"Standard": "EURO IV", "CO2": 1.5, "Nox": 3.5, "PM": 0.02 },
        {"Standard": "EURO V", "CO2": 1.5, "Nox": 2, "PM": 0.02 },
        {"Standard": "EURO VI", "CO2": 1.5, "Nox": 0.4, "PM": 0.01 }
    ]

def calculate_emissions_aqma(filtered_buses_df, emissions_standard):
    """
    Calculates the total emissions for buses of a specific emission standard within the AQMA boundary.
    """
    # Get the emissions data
    emissions_data = get_emissions_data()
    
    # Call filter_by_emissions_class to get filtered buses within AQMA
    filtered_buses_aqma = filter_by_emissions_class(filtered_buses_df, regs, emissions_standard)
    
    # Calculate total distance using the filtered data
    distance = calculate_total_distance(filtered_buses_aqma)
    
    # Define energy consumption per km
    energy_per_km = 5.08  # kWh/km
    
    # Find the emission rates for the given standard
    emissions_dict = next((item for item in emissions_data if item["Standard"] == emissions_standard), None)
    
    if emissions_dict:
        # Calculate total energy consumption
        total_energy_consumption = distance * energy_per_km
        
        # Calculate total emissions
        total_CO2 = total_energy_consumption * emissions_dict["CO2"]
        total_Nox = total_energy_consumption * emissions_dict["Nox"]
        total_PM = total_energy_consumption * emissions_dict["PM"]
        
        # Return results as a dictionary
        return {
            'Standard': emissions_standard,
            'Total distance': distance,
            'CO2': total_CO2,
            'Nox': total_Nox,
            'PM': total_PM
        }
    else:
        return {
            'Standard': emissions_standard,
            'Total distance': 0,
            'CO2': 0,
            'Nox': 0,
            'PM': 0
        }

def calculate_percentage_of_emissions_aqma(filtered_buses_df, emissions_standard, total_emissions):
    """
    Calculates the percentage of total emissions for a given emissions standard within AQMA.
    """
    emissions = calculate_emissions_aqma(filtered_buses_df, emissions_standard)
    
    # Calculate percentages for CO2, Nox, PM
    percent_CO2 = (emissions['CO2'] / total_emissions['CO2']) * 100 if total_emissions['CO2'] > 0 else 0
    percent_Nox = (emissions['Nox'] / total_emissions['Nox']) * 100 if total_emissions['Nox'] > 0 else 0
    percent_PM = (emissions['PM'] / total_emissions['PM']) * 100 if total_emissions['PM'] > 0 else 0
    
    return {
        'Standard': emissions_standard,
        'CO2 %': round(percent_CO2, 2),
        'Nox %': round(percent_Nox, 2),
        'PM %': round(percent_PM, 2)
    }

def calculate_percentage_of_buses_aqma(filtered_buses_df, emissions_class):
    """
    Calculates the percentage of buses that are of a given emissions class within AQMA.
    """
    # Total number of buses within AQMA
    total_buses_aqma = len(filtered_buses_df)
    
    # Filter buses by emissions class
    filtered_buses = filter_by_emissions_class(filtered_buses_df, regs, emissions_class)
    num_buses = len(filtered_buses)
    
    # Calculate percentage
    percentage = (num_buses / total_buses_aqma) * 100 if total_buses_aqma > 0 else 0
    
    return round(percentage, 2)

def filter_aqma_boundary(df):
    """
    Filters the DataFrame to include only rows where the buses passed through the AQMA boundary 
    of Rainham High Street based on latitude and longitude.
    """
    # Define the AQMA boundary
    min_lat = 51.361462
    max_lat = 51.364935
    min_long = 0.603210
    max_long = 0.617510
    
    # Apply the filtering conditions
    filtered_df = df[
        (df['VehicleLocation.Latitude'] >= min_lat) & 
        (df['VehicleLocation.Latitude'] <= max_lat) & 
        (df['VehicleLocation.Longitude'] >= min_long) & 
        (df['VehicleLocation.Longitude'] <= max_long)
    ].copy()
    
    return filtered_df

# Apply the filtering function to the simplified_buses DataFrame
filtered_buses_aqma = filter_aqma_boundary(simplified_buses)

# Ensure the 'VehicleRef' column is of type str
simplified_buses['VehicleRef'] = simplified_buses['VehicleRef'].astype(str)

# Calculate total emissions within AQMA boundary across all standards
total_emissions_aqma = {
    'CO2': 0,
    'Nox': 0,
    'PM': 0
}
for standard in ["EURO III", "EURO IV", "EURO V", "EURO VI"]:
    emissions = calculate_emissions_aqma(filtered_buses_aqma, standard)
    total_emissions_aqma['CO2'] += emissions['CO2']
    total_emissions_aqma['Nox'] += emissions['Nox']
    total_emissions_aqma['PM'] += emissions['PM']

# Print the percentage of buses for each emissions standard within AQMA
print("\nPercentage of buses for each emissions standard within AQMA:")
for standard in ["EURO III", "EURO IV", "EURO V", "EURO VI"]:
    percentage_buses = calculate_percentage_of_buses_aqma(filtered_buses_aqma, standard)
    print(f"{standard}: {percentage_buses}%")

# Print total emissions percentages
print("\nEmissions percentages for each standard within AQMA:")
for standard in ["EURO III", "EURO IV", "EURO V", "EURO VI"]:
    emissions_percentages = calculate_percentage_of_emissions_aqma(filtered_buses_aqma, standard, total_emissions_aqma)
    print(f"{standard}: CO2 {emissions_percentages['CO2 %']}%, Nox {emissions_percentages['Nox %']}%, PM {emissions_percentages['PM %']}%")

# Print the total number of rows
total_rows = len(simplified_buses)
total_filtered_rows = len(filtered_buses_aqma)
print(f"Total rows in original dataset: {total_rows}")
print(f"Total rows in AQMA filtered dataset: {total_filtered_rows}")



Percentage of buses for each emissions standard within AQMA:
EURO III: 31.46%
EURO IV: 8.99%
EURO V: 11.24%
EURO VI: 48.31%

Emissions percentages for each standard within AQMA:
EURO III: CO2 31.3%, Nox 63.6%, PM 71.81%
EURO IV: CO2 3.97%, Nox 7.91%, PM 2.55%
EURO V: CO2 15.11%, Nox 17.19%, PM 9.71%
EURO VI: CO2 49.62%, Nox 11.29%, PM 15.94%
Total rows in original dataset: 11247
Total rows in AQMA filtered dataset: 89
