In [None]:
import csv
import re

def generate_insert_statements(
    csv_file, 
    create_stream_statement, 
    output_file, 
    table_name,
    encoding='utf-8'
):
    # Extract column names from create_stream_statement
    columns = re.findall(r'\b([a-zA-Z_]+)\b\s+[a-zA-Z_]+', create_stream_statement)

    # Open the CSV file
    with open(csv_file, newline='', encoding=encoding, errors='ignore') as csvfile:
        reader = csv.DictReader(csvfile)
        insert_statements = []

        # Iterate over each row in the CSV
        for row in reader:
            columns_insert = []
            values = []

            # Iterate over each column in the create_stream_statement
            for column in columns:
                value = row.get(column)
                if value is None or value.strip() == "":
                    columns_insert.append(column)
                    values.append("NULL")
                else:
                    columns_insert.append(column)
                    # Properly escape single quotes
                    value_escaped = value.replace("'", "''")
                    values.append(f"'{value_escaped}'")

            # Construct the INSERT INTO statement with the specified table name
            insert_statement = f"INSERT INTO {table_name} ({', '.join(columns_insert)}) VALUES ({', '.join(values)});"
            insert_statements.append(insert_statement)

    # Write insert statements to output file
    with open(output_file, 'w', encoding=encoding, errors='ignore') as f:
        for statement in insert_statements:
            f.write(statement + '\n')

In [None]:
create_stream_statement = """
    x_coord FLOAT,
    y_coord FLOAT,
    object_id INTEGER,
    id INTEGER,
    airport_identifier VARCHAR,
    airport_type VARCHAR,
    airport_name VARCHAR,
    latitude_coord FLOAT,
    longitude_coord FLOAT,
    elevation_ft INTEGER,
    continent VARCHAR,
    iso_country VARCHAR,
    iso_region VARCHAR,
    municipality VARCHAR,
    scheduled_service VARCHAR,
    gps_code VARCHAR,
    iata_code VARCHAR,
    local_code VARCHAR,
    home_link VARCHAR,
    wikipedia_link VARCHAR,
    keywords TEXT,
    communications_desc VARCHAR,
    frequency_mhz FLOAT,
    runway_length_ft INTEGER,
    runway_width_ft INTEGER,
    runway_surface VARCHAR,
    runway_lighted INTEGER,
    runway_closed INTEGER
)
"""

input_data_file = '/Users/briandunn/Desktop/kafka_projs/World Airports - Postgres to Cassandra/orig-data/World_Airports.csv'

output_file_location = '/Users/briandunn/Desktop/kafka_projs/World Airports - Postgres to Cassandra/source-scripts/3_inserts.sql'

generate_insert_statements(
    input_data_file, 
    create_stream_statement, 
    output_file_location,
    "world_airports_table"
    )