In [3]:
import sqlite3

In [4]:
def remove_outliers(db_path, threshold=50):
    # Connect to the SQLite database
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()

    # Identify trials with objective values greater than the threshold
    cursor.execute("SELECT trial_id, value FROM trial_values WHERE value > ?", (threshold,))
    outliers = cursor.fetchall()

    if not outliers:
        print("No outliers found.")
        conn.close()
        return

    print(f"Found {len(outliers)} outliers. Removing...")

    # Remove the identified outliers
    for trial_id, value in outliers:
        print(f"Removing trial_id: {trial_id}, value: {value}")
        cursor.execute("DELETE FROM trials WHERE trial_id = ?", (trial_id,))
        cursor.execute("DELETE FROM trial_params WHERE trial_id = ?", (trial_id,))
        cursor.execute("DELETE FROM trial_values WHERE trial_id = ?", (trial_id,))
        cursor.execute("DELETE FROM trial_user_attributes WHERE trial_id = ?", (trial_id,))
        cursor.execute("DELETE FROM trial_system_attributes WHERE trial_id = ?", (trial_id,))
        cursor.execute("DELETE FROM trial_intermediate_values WHERE trial_id = ?", (trial_id,))
        cursor.execute("DELETE FROM trial_heartbeats WHERE trial_id = ?", (trial_id,))

    # Commit the changes and close the connection
    conn.commit()
    conn.close()

    print("Outliers removed successfully.")


In [5]:
def list_tables(db_path):
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()

    cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
    tables = cursor.fetchall()

    print("Tables in the database:")
    for table in tables:
        print(table[0])

    conn.close()
    
    
def list_columns(db_path, table_name):
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()

    cursor.execute(f"PRAGMA table_info({table_name});")
    columns = cursor.fetchall()

    print(f"Columns in the {table_name} table:")
    for column in columns:
        print(column)

    conn.close()

In [6]:
def delete_trials_by_number_or_state(db_path, trial_number=None, state=None):
    # Connect to the SQLite database
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()

    if trial_number is not None:
        # Identify trials with the specified number
        cursor.execute("SELECT trial_id FROM trials WHERE number = ?", (trial_number,))
        trials_to_delete = cursor.fetchall()
        print(f"Found {len(trials_to_delete)} trials with number {trial_number}. Removing...")

    elif state is not None:
        # Identify trials with the specified state
        cursor.execute("SELECT trial_id FROM trials WHERE state = ?", (state,))
        trials_to_delete = cursor.fetchall()
        print(f"Found {len(trials_to_delete)} trials with state {state}. Removing...")

    else:
        print("No criteria specified for deletion.")
        conn.close()
        return

    if not trials_to_delete:
        print("No matching trials found.")
        conn.close()
        return

    # Remove the identified trials
    for (trial_id,) in trials_to_delete:
        print(f"Removing trial_id: {trial_id}")
        cursor.execute("DELETE FROM trials WHERE trial_id = ?", (trial_id,))
        cursor.execute("DELETE FROM trial_params WHERE trial_id = ?", (trial_id,))
        cursor.execute("DELETE FROM trial_values WHERE trial_id = ?", (trial_id,))
        cursor.execute("DELETE FROM trial_user_attributes WHERE trial_id = ?", (trial_id,))
        cursor.execute("DELETE FROM trial_system_attributes WHERE trial_id = ?", (trial_id,))
        cursor.execute("DELETE FROM trial_intermediate_values WHERE trial_id = ?", (trial_id,))
        cursor.execute("DELETE FROM trial_heartbeats WHERE trial_id = ?", (trial_id,))

    # Commit the changes and close the connection
    conn.commit()
    conn.close()

    print("Specified trials removed successfully.")

In [7]:
# Path to your SQLite database
db_path = "/data/Pein/Pytorch/Wind-Power-Prediction/optuna_results/24-08-02/24-08-02-farm_97.db"
study_name = "24-08-02-farm_97"  # Study name

In [8]:

# List columns in the relevant tables

# list_tables(db_path)
# list_columns(db_path, "trials")
# list_columns(db_path, "trial_values")


In [9]:

# trial_id = 0
# delete_trials_by_number_or_state(db_path, trial_number=trial_id)


# trial_id = 1
# delete_trials_by_number_or_state(db_path, trial_number=trial_id)


# trial_id = 2
# delete_trials_by_number_or_state(db_path, trial_number=trial_id)



# trial_id = 3
# delete_trials_by_number_or_state(db_path, trial_number=trial_id)

# Delete trials by state
# delete_trials_by_number_or_state(db_path=db_path, state='Fail')

In [11]:
remove_outliers(db_path,10)

Found 98 outliers. Removing...
Removing trial_id: 12, value: 16.41774452328682
Removing trial_id: 18, value: 25.353874754905704
Removing trial_id: 22, value: 20.275287663936616
Removing trial_id: 36, value: 13.799815458059312
Removing trial_id: 48, value: 10.505297124385834
Removing trial_id: 57, value: 17.49605635404587
Removing trial_id: 63, value: 15.654111641645432
Removing trial_id: 64, value: 14.062833404541017
Removing trial_id: 71, value: 24.271834659576417
Removing trial_id: 75, value: 33.412703508138655
Removing trial_id: 104, value: 17.189171445369723
Removing trial_id: 119, value: 16.893830400705337
Removing trial_id: 123, value: 22.999198013544085
Removing trial_id: 131, value: 26.53750182986259
Removing trial_id: 145, value: 12.48827846646309
Removing trial_id: 148, value: 27.470013064146045
Removing trial_id: 151, value: 11.656564539670946
Removing trial_id: 160, value: 23.559344983100893
Removing trial_id: 167, value: 14.35081777572632
Removing trial_id: 168, value: 12.

In [9]:
import optuna
from optuna.importance import get_param_importances

def analyze_hyperparameter_importance(study):
    # Compute hyperparameter importance
    importance = get_param_importances(study)

    print("Hyperparameter importance:")
    for param, imp in importance.items():
        print(f"{param}: {imp}")



# Load the existing study
study = optuna.load_study(study_name=study_name, storage=f"sqlite:///{db_path}")

# Analyze hyperparameter importance
analyze_hyperparameter_importance(study)

  from .autonotebook import tqdm as notebook_tqdm


Hyperparameter importance:
