In [None]:
!pip install convertbng numpy typing

In [2]:
import csv
from typing import List, Dict
from convertbng.util import convert_lonlat
import numpy as np

In [5]:
class DatasetUnifier:
    def __init__(self, acc_file: str, cas_file: str, veh_file: str) -> None:
        self.acc_file = acc_file
        self.cas_file = cas_file
        self.veh_file = veh_file

    def load_csv_data(self, file_path: str) -> List[Dict[str, str]]:
        with open(file_path, "r") as file:
            csv_reader = csv.DictReader(file)
            return list(csv_reader)

    # Convert Eastings and Northings to Longitude and Latitude
    def convert_coordinates(self, eastings: List[float], northings: List[float]) -> List[Dict[str, float]]:
        lons, lats = convert_lonlat(np.array(eastings), np.array(northings))
        return [{"Longitude": lon, "Latitude": lat} for lon, lat in zip(lons, lats)]

    def join_datasets(self) -> List[Dict[str, str]]:
        acc_data = self.load_csv_data(self.acc_file)
        cas_data = self.load_csv_data(self.cas_file)
        veh_data = self.load_csv_data(self.veh_file)

        eastings = [float(row["Easting"]) for row in acc_data]
        northings = [float(row["Northing"]) for row in acc_data]
        coordinates = self.convert_coordinates(eastings, northings)

        unified_data = []
        for acc_row, coord in zip(acc_data, coordinates):
            acc_index = acc_row["Accident Index"]
            acc_year = acc_row["Year"]
            acc_row["Longitude"] = coord["Longitude"]
            acc_row["Latitude"] = coord["Latitude"]
            del acc_row["Easting"]  # Remove "Easting" from the unified dataset
            del acc_row["Northing"]  # Remove "Northing" from the unified dataset

            for cas_row in cas_data:
                if cas_row["Accident Index"] == acc_index and cas_row["Year"] == acc_year:
                    veh_ref_num = cas_row["VehicleReferenceNumber"]
                    for veh_row in veh_data:
                        if (
                            veh_row["Accident Index"] == acc_index
                            and veh_row["Year"] == acc_year
                            and veh_row["VehicleReferenceNumber"] == veh_ref_num
                        ):
                            unified_row = {**acc_row, **cas_row, **veh_row}
                            unified_data.append(unified_row)

        return unified_data

    def write_unified_dataset(self, output_file: str) -> None:
        unified_data = self.join_datasets()
        if unified_data:
            fieldnames = list(unified_data[0].keys())
            with open(output_file, "w", newline="") as file:
                csv_writer = csv.DictWriter(file, fieldnames=fieldnames)
                csv_writer.writeheader()
                csv_writer.writerows(unified_data)
            print(f"Unified dataset saved to {output_file}")
        else:
            print("No data to write")

def main() -> None:
    acc_file = "./data/STATS19AccDataJan2010Dec2021forGMServers.csv"
    cas_file = "./data/STATS19CasDataJan2010Dec2021forGMServers.csv"
    veh_file = "./data/STATS19VehDataJan2010Dec2021forGMServers.csv"
    output_file = "./data/unified_dataset.csv"

    dataset_unifier = DatasetUnifier(acc_file, cas_file, veh_file)
    dataset_unifier.write_unified_dataset(output_file)

if __name__ == "__main__":
    main()

Unified dataset saved to ./data/unified_dataset.csv
