In [428]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import re

In [429]:
FILE_NAME = '.csv'

URL = os.path.join('../data_clean', FILE_NAME)

In [430]:
data = pd.read_csv(URL, sep=',')
data.head(1)

Unnamed: 0,ActiviteitVereistContact_ActivityId,ActiviteitVereistContact_ReqAttendee
0,0000028D-CC1A-E411-ACD3-005056B06EC4,902EEFE7-2703-E211-8CF1-984BE17C2819


In [431]:
fk_help = pd.read_csv('../data_clean/Contact_fixed.csv', sep=',')
fk_help.head(1)

Unnamed: 0,Contact_Contactpersoon,Contact_Account,Contact_Functietitel,Contact_Persoon_ID,Contact_Status,Contact_Voka_medewerker
0,00002D37-EF48-EB11-8119-001DD8B72B62,01002D37-EF48-EB11-8119-001DD8B72B62,Zaakvoerder,14002D37-EF48-EB11-8119-001DD8B72B62,Actief,0


In [432]:
PRIMARY_KEY = 'ActiviteitVereistContact_ActivityId'

# add tupel like: (column_name, foreign_key_table_name.foreign_key_column_name) || leave empty if not foreign key
FOREIGN_KEY = [('ActiviteitVereistContact_ActivityId', 'Afspraak_alle.Afspraak_ALLE_Afspraak'), ('ActiviteitVereistContact_ReqAttendee', 'Contact.Contact_Contactpersoon')]

In [433]:
def create_column_names(dataframe):
    columns = dataframe.columns
    columns = [col + '_id' if col == PRIMARY_KEY else col for col in columns]
    columns = [re.sub(r'\W+', '', col) for col in columns]
    columns = [col.lower() for col in columns]
    dict_columns = dict(zip(dataframe.columns, columns))
    return dict_columns

def change_fk_name(string):
    string = string + '_id'
    string = string.lower()
    string = string[0].upper() + string[1:]
    return string

In [434]:
new_col_names = create_column_names(data)

In [435]:
data.rename(
    columns=new_col_names, inplace=True)

if PRIMARY_KEY:
    PRIMARY_KEY = new_col_names[PRIMARY_KEY]
FOREIGN_KEY = [(new_col_names[fk[0]], fk[1]) for fk in FOREIGN_KEY]
FOREIGN_KEY = [(fk[0], change_fk_name(fk[1])) for fk in FOREIGN_KEY]

In [436]:
def generate_create_table_query():
    sql_data_types = {
        'int64': 'INT',
        'float64': 'FLOAT',
        'object': 'VARCHAR(255)',
        'datetime64[ns]': 'DATE',
    }

    table_name = FILE_NAME[:-10]  # Remove the '.csv' extension and the 'fixed' to use as the table name
    table_name = table_name.replace(' ', '_')  # Replace spaces with underscores to prevent SQL errors

    # Create a list of column definitions with each column on a new line
    columns_list = [f'{col} {sql_data_types.get(str(dtype), "VARCHAR(255)")}' for col, dtype in zip(data.columns, data.dtypes)]

    # Include PRIMARY KEY and FOREIGN KEY columns if provided
    if PRIMARY_KEY:
        # find which column is the primary key
        primary_key_index = data.columns.get_loc(PRIMARY_KEY)
        # add PRIMARY KEY to the column definition
        columns_list[primary_key_index] = f'{PRIMARY_KEY} INT NOT NULL PRIMARY KEY IDENTITY(1,1)'
    if FOREIGN_KEY:
        # loop through the list of FOREIGN KEY tuples and add each FOREIGN KEY to the column definition
        for foreign_key in FOREIGN_KEY:
            columns_list.append(f'FOREIGN KEY ({foreign_key[0]}) REFERENCES {foreign_key[1].split(".")[0]}({foreign_key[1].split(".")[1]})')
            
    # Join the column definitions into a string with each column on a new line
    columns = ',\n'.join(columns_list)

    # Generate the CREATE TABLE query
    create_table_query = f'IF NOT EXISTS (SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = \'{table_name}\')\nBEGIN\nCREATE TABLE {table_name} (\n{columns});\nEND'

    # Print the generated CREATE TABLE query
    print(create_table_query)
    print()

    ################################################
    ### Generate the SQL_Alchemy class notation ####
    ################################################
    
    # Generate the SQLAlchemy class representation
    class_representation = f'class {table_name.capitalize()}(Base):\n'
    class_representation += f'    __tablename__ = \'{table_name}\'\n'

    for i, (col, dtype) in enumerate(zip(data.columns, data.dtypes)):
        # Change VARCHAR(255) to String(255)
        sqlalchemy_type = 'String(255)' if sql_data_types.get(str(dtype), None) == 'VARCHAR(255)' else sql_data_types.get(str(dtype), 'String(255)')
        
        if str(dtype) == 'int64':
            sqlalchemy_type = 'Integer'
        elif str(dtype) == 'float64':
            sqlalchemy_type = 'Float'
        elif str(dtype) == 'datetime64[ns]':
            sqlalchemy_type = 'Date'
        else:
            sqlalchemy_type = 'String(255)'

        # Skip the column with the primary key, if applicable
        if PRIMARY_KEY:
            if primary_key_index is not None and i == primary_key_index:
                class_representation += f'    {col} = Column({sqlalchemy_type}, primary_key=True)\n'
        else:
            class_representation += f'    {col} = Column({sqlalchemy_type})\n'
                
    # Add foreign keys to the class representation, if applicable
    if FOREIGN_KEY:
        for foreign_key_tuple in FOREIGN_KEY:
            # find datatype of foreign key column
            foreign_key_column = data[foreign_key_tuple[0]]
            foreign_key_dtype = str(foreign_key_column.dtype)
            sqlalchemy_type = 'String(255)' if sql_data_types.get(foreign_key_dtype, None) == 'VARCHAR(255)' else sql_data_types.get(foreign_key_dtype, 'String(255)')
            # replace the column definition with the column definition and foreign key
            fk_str_rep = f'{foreign_key_tuple[0]} = Column({sqlalchemy_type}, ForeignKey(\'{foreign_key_tuple[1]}\'))'
            class_representation = class_representation.replace(foreign_key_tuple[0], fk_str_rep)
            class_representation = class_representation.replace(f'{fk_str_rep} = Column({sqlalchemy_type})', fk_str_rep)

    print(class_representation)
    print()

In [437]:
generate_create_table_query()

IF NOT EXISTS (SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 'Activiteit_vereist_contact')
BEGIN
CREATE TABLE Activiteit_vereist_contact (
activiteitvereistcontact_activityid_id INT NOT NULL PRIMARY KEY IDENTITY(1,1),
activiteitvereistcontact_reqattendee VARCHAR(255),
FOREIGN KEY (activiteitvereistcontact_activityid_id) REFERENCES Afspraak_alle(afspraak_alle_afspraak_id),
FOREIGN KEY (activiteitvereistcontact_reqattendee) REFERENCES Contact(contact_contactpersoon_id));
END

class Activiteit_vereist_contact(Base):
    __tablename__ = 'Activiteit_vereist_contact'
    activiteitvereistcontact_activityid_id = Column(String(255), ForeignKey('Afspraak_alle.afspraak_alle_afspraak_id')) = Column(String(255), primary_key=True)


