In [1]:
import pandas as pd
import os

In [5]:
# Step 1: Load Data
# --- Define the path to your final data file ---
# Make sure this path matches the file you saved previously
FINAL_DATA_PATH = "../data/processed/traffic_data_with_weather_holidays.csv" 

print(f"--- Loading Final Data from: {FINAL_DATA_PATH} ---")

try:
    # Load the CSV file
    df_final_data = pd.read_csv(FINAL_DATA_PATH)

    # Display the first few rows and info
    print("\nDataFrame Head:")
    display(df_final_data.head())

    print("\nDataFrame Info:")
    df_final_data.info()

except FileNotFoundError:
    print(f"Error: File not found at {FINAL_DATA_PATH}. Please ensure the previous steps ran correctly and saved the file.")
    # Stop execution if the file isn't found
    raise
except Exception as e:
    print(f"An error occurred while loading the data: {type(e).__name__} - {e}")
    raise

--- Loading Final Data from: ../data/processed/traffic_data_with_weather_holidays.csv ---

DataFrame Head:


Unnamed: 0,Datetime,Motorcycle,Car,Truck_Lorry,Bus,Is_Holiday,Day_Of_Week,Is_Weekend,Hour,Month,Temperature_2m,Relative_Humidity_2m,Weathercode,Windspeed_10m
0,1/1/2024 0:00,77,291,0,0,1,0,0,0,1,25.3,92,3,5.9
1,1/1/2024 1:00,102,291,0,0,1,0,0,1,1,25.0,91,3,6.3
2,1/1/2024 2:00,161,383,0,0,1,0,0,2,1,24.7,93,1,7.5
3,1/1/2024 3:00,90,287,0,0,1,0,0,3,1,24.5,94,2,8.4
4,1/1/2024 4:00,113,301,1,0,1,0,0,4,1,24.3,94,2,9.7



DataFrame Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8781 entries, 0 to 8780
Data columns (total 14 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Datetime              8781 non-null   object 
 1   Motorcycle            8781 non-null   int64  
 2   Car                   8781 non-null   int64  
 3   Truck_Lorry           8781 non-null   int64  
 4   Bus                   8781 non-null   int64  
 5   Is_Holiday            8781 non-null   int64  
 6   Day_Of_Week           8781 non-null   int64  
 7   Is_Weekend            8781 non-null   int64  
 8   Hour                  8781 non-null   int64  
 9   Month                 8781 non-null   int64  
 10  Temperature_2m        8781 non-null   float64
 11  Relative_Humidity_2m  8781 non-null   int64  
 12  Weathercode           8781 non-null   int64  
 13  Windspeed_10m         8781 non-null   float64
dtypes: float64(2), int64(11), object(1)
memory usage: 960.6

In [6]:
# Step 2: Calculate Congestion Score
# Check if the DataFrame exists
if 'df_final_data' in locals() and not df_final_data.empty:
    print("\n--- Calculating Congestion Score ---")

    # Apply the formula using the vehicle count columns
    try:
        df_final_data['Congestion_Score'] = (
            (df_final_data['Motorcycle'] * 0.5) +
            (df_final_data['Car'] * 1) +
            (df_final_data['Truck_Lorry'] * 1.5) +
            (df_final_data['Bus'] * 2)
        )
        print("Calculated 'Congestion_Score'.")

        # Display head with the new column
        display(df_final_data[['Datetime', 'Motorcycle', 'Car', 'Congestion_Score']].head())

    except KeyError as e:
        print(f"Error calculating Congestion Score: Missing column - {e}. Check if required vehicle columns exist.")
        raise
    except Exception as e:
        print(f"An unexpected error occurred calculating Congestion Score: {e}")
        raise
else:
    print("\nDataFrame ('df_final_data') not found or empty. Run Step 1 first.")


--- Calculating Congestion Score ---
Calculated 'Congestion_Score'.


Unnamed: 0,Datetime,Motorcycle,Car,Congestion_Score
0,1/1/2024 0:00,77,291,329.5
1,1/1/2024 1:00,102,291,342.0
2,1/1/2024 2:00,161,383,463.5
3,1/1/2024 3:00,90,287,332.0
4,1/1/2024 4:00,113,301,359.0


In [7]:
# Step 3: Calculate Congestion Percentage
# Check if the DataFrame and Congestion_Score exist
if 'df_final_data' in locals() and not df_final_data.empty and 'Congestion_Score' in df_final_data.columns:
    print("\n--- Calculating Congestion Percentage ---")

    # --- Find the Maximum Congestion Score ---
    max_congestion_score = df_final_data['Congestion_Score'].max()
    print(f"Maximum Congestion Score found: {max_congestion_score}")

    # --- Calculate Congestion Percentage ---
    # Avoid division by zero if max score is 0
    if max_congestion_score > 0:
        df_final_data['Congestion_Percentage'] = (df_final_data['Congestion_Score'] / max_congestion_score) * 100
        print("Calculated 'Congestion_Percentage'.")
    else:
        df_final_data['Congestion_Percentage'] = 0 # Assign 0 if max score is 0
        print("Maximum Congestion Score is 0, setting Congestion Percentage to 0 for all rows.")

    # --- Verification ---
    print("\nDataFrame with Congestion metrics (head):")
    # Display relevant columns
    display(df_final_data[['Datetime', 'Congestion_Score', 'Congestion_Percentage']].head())

    print("\nFinal DataFrame Info:")
    df_final_data.info()

else:
    print("\nDataFrame ('df_final_data') or 'Congestion_Score' column not found. Run previous steps first.")


--- Calculating Congestion Percentage ---
Maximum Congestion Score found: 3629.5
Calculated 'Congestion_Percentage'.

DataFrame with Congestion metrics (head):


Unnamed: 0,Datetime,Congestion_Score,Congestion_Percentage
0,1/1/2024 0:00,329.5,9.078385
1,1/1/2024 1:00,342.0,9.422786
2,1/1/2024 2:00,463.5,12.770354
3,1/1/2024 3:00,332.0,9.147265
4,1/1/2024 4:00,359.0,9.89117



Final DataFrame Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8781 entries, 0 to 8780
Data columns (total 16 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Datetime               8781 non-null   object 
 1   Motorcycle             8781 non-null   int64  
 2   Car                    8781 non-null   int64  
 3   Truck_Lorry            8781 non-null   int64  
 4   Bus                    8781 non-null   int64  
 5   Is_Holiday             8781 non-null   int64  
 6   Day_Of_Week            8781 non-null   int64  
 7   Is_Weekend             8781 non-null   int64  
 8   Hour                   8781 non-null   int64  
 9   Month                  8781 non-null   int64  
 10  Temperature_2m         8781 non-null   float64
 11  Relative_Humidity_2m   8781 non-null   int64  
 12  Weathercode            8781 non-null   int64  
 13  Windspeed_10m          8781 non-null   float64
 14  Congestion_Score       8781 non-n

In [8]:
# Step 4: Optional - Save Final Data with Congestion Metrics
# --- Optional: Save the DataFrame with the new columns ---
if 'df_final_data' in locals() and not df_final_data.empty:
    FINAL_CONGESTION_PATH = "../data/processed/traffic_data_with_congestion.csv"
    try:
        df_final_data.to_csv(FINAL_CONGESTION_PATH, index=False)
        print(f"\nFinal data including congestion saved to: {FINAL_CONGESTION_PATH}")
    except Exception as e:
        print(f"Error saving file: {e}")
else:
    print("\nNo final DataFrame ('df_final_data') to save.")


Final data including congestion saved to: ../data/processed/traffic_data_with_congestion.csv
