In [None]:
import pandas as pd
from datetime import datetime
from clickhouse_driver import Client
import logging
import random
import numpy as np

def read_clickhouse_to_dataframe(host,port,user,password,database):
    query = 'SELECT email_address,runid,source_name,file_name FROM registria'
    # Connect to ClickHouse server
    client1 = Client(host=host, port=port, user=user, password=password, database=database)
    try:
        # Execute SQL query to fetch data
        result = client1.execute(query)
        columns = ['email_address','runid','source_name','file_name']
        df = pd.DataFrame(result, columns=columns)
        df=df.to_dict(orient="records")

    finally:
        # Close ClickHouse connection
        client1.disconnect()

    return df

def read_clickhouse_sen_id(host, port, user, password, database):
    query = 'SELECT * FROM send_id_lookup'
    client = Client(host=host, port=port, user=user, password=password, database=database)

    try:
        # Check if the send_id_lookup table exists, if not, create it
        check_table_query = "SHOW TABLES LIKE 'send_id_lookup'"
        result = client.execute(check_table_query)
        if not result:
            create_table_query = "CREATE TABLE send_id_lookup (sendid Int64) ENGINE = MergeTree() ORDER BY sendid"
            client.execute(create_table_query)
        else:
            result = client.execute(query)
            send_ids = [row[0] for row in result]
            return send_ids
    finally:
        client.disconnect()

    return []

def unique_send_id(in_data, host, port, user, password, database):
    lis_of_send_id = read_clickhouse_sen_id(host, port, user, password, database)
    print(lis_of_send_id)

    for val in in_data:
        sendid = val.get('sendid')
        if sendid is not None and isinstance(sendid, int):
            if sendid in lis_of_send_id:
                # Generate a unique random number
                random_no = generate_unique_random_not_in_list(lis_of_send_id)
                val['sendid'] = random_no
                lis_of_send_id.append(random_no)
            else:
                lis_of_send_id.append(sendid)
        else:
            # If sendid is not present or not an integer, generate a new random id
            random_no = generate_unique_random_not_in_list(lis_of_send_id)
            val['sendid'] = random_no
            lis_of_send_id.append(random_no)
    try:
        client = Client(host=host, port=port, user=user, password=password, database=database)

        # Convert in_data to DataFrame
        df = pd.DataFrame(in_data)
        print("======df created============")
        df['sendid'] = df['sendid'].astype(np.int64)  # Convert 'sendid' column to numpy int64

        # Create df1 as a DataFrame containing only the 'sendid' column from df
        df1 = df[['sendid']]
        print("======df1 created============")
        print(df1.dtypes)
        data_tuples = [tuple(row) for row in df1.to_numpy()]
        output = client.execute(f"INSERT INTO send_id_lookup (sendid) VALUES", data_tuples)
        print("======df1 inserted============")

    except Exception as e:
        logging.error(e)
        print(e)
    finally:
        client.disconnect()

    return in_data

def generate_unique_random_not_in_list(lst):
    lower_bound = 200
    upper_bound = 20000000
    while True:
        random_number = random.randint(lower_bound, upper_bound)
        if random_number not in lst:
            return random_number

def main_new(in_event_data,host,port,user,password,database):
    lis=[]
    dfs=unique_send_id(in_event_data,host,port,user,password,database)
    df = read_clickhouse_to_dataframe(host,port,user,password,database)
    for i in range(len(df)):
        try:
            dfs[i]['runid']=df[i]['runid']
            dfs[i]['emailaddress']=df[i]['email_address']
            dfs[i]['source']=df[i]['source_name']
            dfs[i]['file_name']=df[i]['file_name']
            dfs[i]['insertion_date']=str(datetime.now().date())
            lis.append(dfs[i])
        except Exception as e:
            logging.info(e)
    return lis