In [None]:
import numpy as np
import pandas as pd
from sqlalchemy import MetaData, select
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session, registry

from utils.create_object import create_object

from utils.handle_engine import get_engine

In [None]:
engine = get_engine()
metadata = MetaData()
metadata.reflect(bind=engine)
Base = automap_base(metadata=metadata)
Base.prepare(autoload_with=engine)
mapper_registry = registry()

In [None]:
metadata.tables["feature"]

In [None]:
# Access the tables
datapoint = metadata.tables['datapoint']
datapoint_feature_value = metadata.tables['datapoint_feature_value']
feature = metadata.tables['feature']
datapoint_mappings = metadata.tables['datapoint_mappings']

In [None]:
# Construct the SQL query for the training set
query_train = select(
    datapoint.c.id.label('datapoint_id'),
    feature.c.name.label('feature_name'),
    datapoint_feature_value.c.value.label('feature_value')
).select_from(
    datapoint.join(datapoint_feature_value, datapoint.c.id == datapoint_feature_value.c.datapoint_id)
    .join(feature, feature.c.id == datapoint_feature_value.c.feature_id)
    .join(datapoint_mappings, datapoint.c.datapoint_mappings_id == datapoint_mappings.c.id)
).where(
    datapoint_mappings.c.grouping == 'train'
)

# Construct the SQL query for the test set
query_test = select(
    datapoint.c.id.label('datapoint_id'),
    feature.c.name.label('feature_name'),
    datapoint_feature_value.c.value.label('feature_value')
).select_from(
    datapoint.join(datapoint_feature_value, datapoint.c.id == datapoint_feature_value.c.datapoint_id)
    .join(feature, feature.c.id == datapoint_feature_value.c.feature_id)
    .join(datapoint_mappings, datapoint.c.datapoint_mappings_id == datapoint_mappings.c.id)
).where(
    datapoint_mappings.c.grouping == 'test'
)

In [None]:
# Execute the queries and load into DataFrames
df_train = pd.read_sql_query(query_train, engine)
df_test = pd.read_sql_query(query_test, engine)

# Pivot tables to get features as columns, for both train and test
pivot_train = df_train.pivot_table(index='datapoint_id', columns='feature_name', values='feature_value').reset_index()
pivot_test = df_test.pivot_table(index='datapoint_id', columns='feature_name', values='feature_value').reset_index()

In [None]:
pivot_test.columns

In [None]:
# Assuming engine, Base, and other necessary imports are already defined

def update_entity_attributes(entity, attributes):
    """
    Update the attributes of a given SQLAlchemy entity.

    Args:
        entity: The SQLAlchemy entity to be updated.
        attributes: A dictionary of attribute names and their new values.
    """
    for attr, value in attributes.items():
        setattr(entity, attr, value)
    return entity

# Define function to clone initial parameters
def clone_initial_parameters(session, entity_class):
    """
    Clone the latest record of a given entity class, excluding the ID.

    Args:
        session: The SQLAlchemy session.
        entity_class: The SQLAlchemy model class to clone.

    Returns:
        A new instance of the entity class with cloned attributes.
    """
    # Get the latest record
    latest_record = session.query(entity_class).order_by(entity_class.id.desc()).first()

    # Clone attributes except 'id'
    cloned_attributes = {col.name: getattr(latest_record, col.name) for col in entity_class.__table__.columns if col.name != "id"}
    return create_object(entity_class.__name__, **cloned_attributes)


In [None]:
context = dict()
context["base"] = Base
context["session"] = Session(bind=engine)
session = context["session"]

In [None]:

# Load initial parameters or clone them if necessary
train_process_count = session.query(Base.classes.train_process).count()
init_param_count = session.query(Base.classes.train_process_init_parameter).count()

# Check if cloning of initial parameters is required
if train_process_count == init_param_count:
    # Clone the latest initial parameters
    new_init_params = clone_initial_parameters(session, Base.classes.train_process_init_parameter)

    # Assign new ID to the cloned parameters
    new_init_params.id = init_param_count + 1

    # Add the cloned parameters to the session and commit
    session.add(new_init_params)
    session.commit()

# Create and add new train process entry
new_train_process = create_object(context, "train_process", with_commit=True, id=train_process_count + 1)
# Update initial parameters with new train process ID
update_entity_attributes(new_init_params, {"train_process_id": new_train_process.id})
session.commit()
