In [1]:
import csv
import re

def generate_insert_statements(
    csv_file, 
    create_stream_statement, 
    output_file, 
    table_name,
    encoding='utf-8'
):
    # Extract content within parentheses
    stream_content = re.search(r'\((.*?)\)', create_stream_statement, re.DOTALL).group(1)
    
    # Split content into individual column definitions
    columns = re.findall(r'([a-zA-Z_]+)\s+([a-zA-Z_]+)', stream_content)

    # Extract column names and data types, stripping BOM if present
    column_names_create_stream = [col[0].strip() for col in columns]
    data_types_create_stream = {col[0].strip(): col[1] for col in columns}

    # Open the CSV file
    with open(csv_file, newline='', encoding=encoding, errors='ignore') as csvfile:
        reader = csv.DictReader(csvfile)
        insert_statements = []

        # Iterate over each row in the CSV
        for row in reader:
            columns = []
            values = []

            # Iterate over each column in the row
            for field, value in row.items():
                if field in column_names_create_stream:
                    # Column name exists in create_stream_statement
                    data_type = data_types_create_stream[field]
                    if value is None:
                        columns.append(field)
                        values.append("")
                    else:
                        if data_type == "DOUBLE" or data_type == "INTEGER":
                            value_str = str(value)
                            if value_str.strip() == "":
                                columns.append(field)
                                values.append("")
                            else:
                                columns.append(field)
                                values.append(value_str)
                        elif data_type == "TIMESTAMP" or data_type == "DATE":
                            columns.append(field)
                            values.append("'" + str(value) + "'")
                        elif data_type == "VARCHAR":
                            columns.append(field)
                            values.append("'" + value.replace("'", "''") + "'")
                        # Handle other data types as needed

            # Construct the INSERT INTO statement with the specified table name
            insert_statement = f"INSERT INTO {table_name} ({', '.join(columns)}) VALUES ({', '.join(values)});"
            insert_statements.append(insert_statement)

    # Write insert statements to output file
    with open(output_file, 'w', encoding=encoding, errors='ignore') as f:
        for statement in insert_statements:
            f.write(statement + '\n')

In [2]:
create_stream_statement = """
CREATE STREAM crypto_avax_usd (
    trading_id INTEGER, 
    trading_date DATE,
    trading_open DOUBLE,
    trading_high DOUBLE,
    trading_low DOUBLE,
    trading_close DOUBLE,
    trading_adj_close DOUBLE,
    trading_volume INTEGER
)
"""

input_data_file = '/Users/briandunn/Desktop/Apache Kafka- Kafka Connect/Cryptocurrencies Prices/data/AVAX-USD.csv'

output_file_location = '/Users/briandunn/Desktop/Apache Kafka- Kafka Connect/Cryptocurrencies Prices/statements/inserts/avax_inserts.sql'

generate_insert_statements(
    input_data_file, 
    create_stream_statement, 
    output_file_location,
    "crypto_avax_usd"
    )

In [3]:
create_stream_statement = """
CREATE STREAM crypto_bnb_usd (
    trading_id INTEGER, 
    trading_date DATE,
    trading_open DOUBLE,
    trading_high DOUBLE,
    trading_low DOUBLE,
    trading_close DOUBLE,
    trading_adj_close DOUBLE,
    trading_volume INTEGER
)
"""

input_data_file = '/Users/briandunn/Desktop/Apache Kafka- Kafka Connect/Cryptocurrencies Prices/data/BNB-USD.csv'

output_file_location = '/Users/briandunn/Desktop/Apache Kafka- Kafka Connect/Cryptocurrencies Prices/statements/inserts/bnb_inserts.sql'

generate_insert_statements(
    input_data_file, 
    create_stream_statement, 
    output_file_location,
    "crypto_bnb_usd"
    )

In [4]:
create_stream_statement = """
CREATE STREAM crypto_btc_usd (
    trading_id INTEGER, 
    trading_date DATE,
    trading_open DOUBLE,
    trading_high DOUBLE,
    trading_low DOUBLE,
    trading_close DOUBLE,
    trading_adj_close DOUBLE,
    trading_volume INTEGER
)
"""

input_data_file = '/Users/briandunn/Desktop/Apache Kafka- Kafka Connect/Cryptocurrencies Prices/data/BTC-USD.csv'

output_file_location = '/Users/briandunn/Desktop/Apache Kafka- Kafka Connect/Cryptocurrencies Prices/statements/inserts/btc_inserts.sql'

generate_insert_statements(
    input_data_file, 
    create_stream_statement, 
    output_file_location,
    "crypto_btc_usd"
    )

In [5]:
create_stream_statement = """
CREATE STREAM crypto_doge_usd (
    trading_id INTEGER, 
    trading_date DATE,
    trading_open DOUBLE,
    trading_high DOUBLE,
    trading_low DOUBLE,
    trading_close DOUBLE,
    trading_adj_close DOUBLE,
    trading_volume INTEGER
)
"""

input_data_file = '/Users/briandunn/Desktop/Apache Kafka- Kafka Connect/Cryptocurrencies Prices/data/DOGE-USD.csv'

output_file_location = '/Users/briandunn/Desktop/Apache Kafka- Kafka Connect/Cryptocurrencies Prices/statements/inserts/doge_inserts.sql'

generate_insert_statements(
    input_data_file, 
    create_stream_statement, 
    output_file_location,
    "crypto_doge_usd"
    )

In [6]:
create_stream_statement = """
CREATE STREAM crypto_eth_usd (
    trading_id INTEGER, 
    trading_date DATE,
    trading_open DOUBLE,
    trading_high DOUBLE,
    trading_low DOUBLE,
    trading_close DOUBLE,
    trading_adj_close DOUBLE,
    trading_volume INTEGER
)
"""

input_data_file = '/Users/briandunn/Desktop/Apache Kafka- Kafka Connect/Cryptocurrencies Prices/data/ETH-USD.csv'

output_file_location = '/Users/briandunn/Desktop/Apache Kafka- Kafka Connect/Cryptocurrencies Prices/statements/inserts/eth_inserts.sql'

generate_insert_statements(
    input_data_file, 
    create_stream_statement, 
    output_file_location,
    "crypto_eth_usd"
    )

In [7]:
create_stream_statement = """
CREATE STREAM crypto_sol_usd (
    trading_id INTEGER, 
    trading_date DATE,
    trading_open DOUBLE,
    trading_high DOUBLE,
    trading_low DOUBLE,
    trading_close DOUBLE,
    trading_adj_close DOUBLE,
    trading_volume INTEGER
)
"""

input_data_file = '/Users/briandunn/Desktop/Apache Kafka- Kafka Connect/Cryptocurrencies Prices/data/SOL-USD.csv'

output_file_location = '/Users/briandunn/Desktop/Apache Kafka- Kafka Connect/Cryptocurrencies Prices/statements/inserts/sol_inserts.sql'

generate_insert_statements(
    input_data_file, 
    create_stream_statement, 
    output_file_location,
    "crypto_sol_usd"
    )

In [8]:
create_stream_statement = """
CREATE STREAM crypto_trx_usd (
    trading_id INTEGER, 
    trading_date DATE,
    trading_open DOUBLE,
    trading_high DOUBLE,
    trading_low DOUBLE,
    trading_close DOUBLE,
    trading_adj_close DOUBLE,
    trading_volume INTEGER
)
"""

input_data_file = '/Users/briandunn/Desktop/Apache Kafka- Kafka Connect/Cryptocurrencies Prices/data/TRX-USD.csv'

output_file_location = '/Users/briandunn/Desktop/Apache Kafka- Kafka Connect/Cryptocurrencies Prices/statements/inserts/trx_inserts.sql'

generate_insert_statements(
    input_data_file, 
    create_stream_statement, 
    output_file_location,
    "crypto_trx_usd"
    )

In [9]:
create_stream_statement = """
CREATE STREAM crypto_usdt_usd (
    trading_id INTEGER, 
    trading_date DATE,
    trading_open DOUBLE,
    trading_high DOUBLE,
    trading_low DOUBLE,
    trading_close DOUBLE,
    trading_adj_close DOUBLE,
    trading_volume INTEGER
)
"""

input_data_file = '/Users/briandunn/Desktop/Apache Kafka- Kafka Connect/Cryptocurrencies Prices/data/USDT-USD.csv'

output_file_location = '/Users/briandunn/Desktop/Apache Kafka- Kafka Connect/Cryptocurrencies Prices/statements/inserts/usdt_inserts.sql'

generate_insert_statements(
    input_data_file, 
    create_stream_statement, 
    output_file_location,
    "crypto_usdt_usd"
    )

In [10]:
create_stream_statement = """
CREATE STREAM crypto_xrp_usd (
    trading_id INTEGER, 
    trading_date DATE,
    trading_open DOUBLE,
    trading_high DOUBLE,
    trading_low DOUBLE,
    trading_close DOUBLE,
    trading_adj_close DOUBLE,
    trading_volume INTEGER
)
"""

input_data_file = '/Users/briandunn/Desktop/Apache Kafka- Kafka Connect/Cryptocurrencies Prices/data/XRP-USD.csv'

output_file_location = '/Users/briandunn/Desktop/Apache Kafka- Kafka Connect/Cryptocurrencies Prices/statements/inserts/xrp_inserts.sql'

generate_insert_statements(
    input_data_file, 
    create_stream_statement, 
    output_file_location,
    "crypto_xrp_usd"
    )

In [11]:
create_stream_statement = """
CREATE STREAM crypto_ada_usd (
    trading_id INTEGER, 
    trading_date DATE,
    trading_open DOUBLE,
    trading_high DOUBLE,
    trading_low DOUBLE,
    trading_close DOUBLE,
    trading_adj_close DOUBLE,
    trading_volume INTEGER
)
"""

input_data_file = '/Users/briandunn/Desktop/Apache Kafka- Kafka Connect/Cryptocurrencies Prices/data/ADA-USD.csv'

output_file_location = '/Users/briandunn/Desktop/Apache Kafka- Kafka Connect/Cryptocurrencies Prices/statements/inserts/ada_inserts.sql'

generate_insert_statements(
    input_data_file, 
    create_stream_statement, 
    output_file_location,
    "crypto_ada_usd"
    )