In [1]:
import csv
from collections import namedtuple
from datetime import datetime
from typing import Iterator


TrafficViolation = namedtuple('TrafficViolation', ['summons_number', 'plate_id', 'registration_state', 
                                                   'plate_type', 'issue_date', 'violation_code', 
                                                   'vehicle_body_type', 'vehicle_make', 'violation_description'])


def row_to_namedtuple(row: dict) -> TrafficViolation:
    """
    Convert a dictionary row from the CSV file to a TrafficViolation named tuple with appropriate data types.
    
    Returns:
        TrafficViolation: A named tuple containing the row data with appropriate data type conversions.
    
    Raises:
        ValueError: If there is an issue with data type conversion.
    """
    try:
        return TrafficViolation(
            summons_number=int(row['Summons Number']),  
            plate_id=row['Plate ID'],  
            registration_state=row['Registration State'],  
            plate_type=row['Plate Type'],  
            issue_date=datetime.strptime(row['Issue Date'], '%m/%d/%Y'),
            violation_code=int(row['Violation Code']),  
            vehicle_body_type=row['Vehicle Body Type'],  
            vehicle_make=row['Vehicle Make'],  
            violation_description=row['Violation Description']  
        )
    except ValueError as e:
        print(f"Error processing row: {row} -> {e}")
        raise

# Lazy iterator to read the CSV file row by row
def lazy_csv_reader(file_path: str) -> Iterator[TrafficViolation]:
    """
    A lazy iterator that reads a CSV file row by row and converts each row to a TrafficViolation named tuple.

    Yields:
        TrafficViolation: A named tuple representing each row from the CSV with the correct data types.
    """    
    with open(file_path, mode='r') as f:
        reader = csv.DictReader(f)
        for row in reader:
            yield row_to_namedtuple(row)


In [2]:
from collections import defaultdict

def count_violations_by_make(file_path: str) -> dict:
    """
    Count the number of traffic violations grouped by vehicle make.
    
    Returns:
        dict: A dictionary where the keys are vehicle makes (str) and the values are the number of violations (int) for each make.
    """
    violations_by_make = defaultdict(int)
    for violation in lazy_csv_reader(file_path):
        violations_by_make[violation.vehicle_make] += 1
    return violations_by_make

# Example usage
file_path = 'nyc_parking_tickets_extract-1.csv'
violations_count = count_violations_by_make(file_path)
for make, count in violations_count.items():
    print(f'{make}: {count} violations')


BMW: 34 violations
CHEVR: 76 violations
DODGE: 45 violations
FORD: 104 violations
FRUEH: 44 violations
HONDA: 106 violations
LINCO: 12 violations
TOYOT: 112 violations
CADIL: 9 violations
CHRYS: 12 violations
FIR: 1 violations
GMC: 35 violations
HYUND: 35 violations
JAGUA: 3 violations
JEEP: 22 violations
LEXUS: 26 violations
ME/BE: 38 violations
MERCU: 4 violations
MITSU: 11 violations
NISSA: 70 violations
HIN: 6 violations
NS/OT: 18 violations
WORKH: 2 violations
ACURA: 12 violations
AUDI: 12 violations
INTER: 25 violations
ISUZU: 10 violations
KENWO: 5 violations
KIA: 8 violations
OLDSM: 1 violations
SUBAR: 18 violations
VOLVO: 12 violations
SATUR: 2 violations
SMART: 3 violations
INFIN: 13 violations
PETER: 1 violations
: 5 violations
CITRO: 1 violations
ROVER: 5 violations
BUICK: 5 violations
GEO: 1 violations
MAZDA: 5 violations
PORSC: 3 violations
VOLKS: 8 violations
YAMAH: 1 violations
BSA: 1 violations
MINI: 1 violations
PONTI: 1 violations
SPRI: 1 violations
PLYMO: 1 violatio