## Import libraries!

In [60]:
import pandas as pd

#pd.set_option('display.max_columns', None)  # Show all columns
#pd.set_option('display.width', 1000)        # Set console width
#pd.set_option('display.max_colwidth', 40)   # Limit the maximum width of each column


## Read from trials csv

In [61]:
trials = pd.read_csv("trials.csv")
trials.describe()

Unnamed: 0,study_id,trial_id,trial_number,param_value,value
count,1878.0,1878.0,1878.0,1878.0,1878.0
mean,63.660277,556.518637,98.089457,314.813014,1.632248
std,30.402532,319.373231,68.748462,357.216384,0.084012
min,1.0,4.0,0.0,0.000101,1.512869
25%,44.0,285.25,41.0,0.001249,1.557154
50%,63.0,554.0,88.0,16.004976,1.601807
75%,77.0,822.75,142.75,662.0,1.708823
max,106.0,1136.0,269.0,1024.0,1.906356


## Divide Each Unique Study into Its Own DataFrame

In [62]:
# Group the DataFrame by 'study_id' and create a dictionary of DataFrames
grouped = trials.groupby('study_name')
dfs = {study_name: group for study_name, group in grouped}


## Create a Sorted DataFrame for Each Study

In [63]:
# Sort each DataFrame by the 'value' column in descending order
sorted_dfs = {study_name: df.sort_values(by='value', ascending=True) for study_name, df in dfs.items()}


## Create a DataFrame for Best Trials of Each Study

In [64]:
# Initialize a dictionary to hold the DataFrames of best trials for each study
best_trial_dfs = {}

for study_name, df in dfs.items():
    # Sort the trials by their order of execution (trial_number)
    df_sorted_by_trial = df.sort_values(by='trial_number')
    
    # Initialize the best value to a very small number
    best_value = -float('inf')
    
    # List to keep track of rows that were best trials at their time
    best_trials = []
    
    for _, row in df_sorted_by_trial.iterrows():
        if row['value'] > best_value:
            best_value = row['value']
            best_trials.append(row)
    
    # Create a DataFrame from the best trials
    best_trial_dfs[study_name] = pd.DataFrame(best_trials)


In [65]:
study_3_original_df = dfs["distributed_optimization_15"]
# study_3_sorted_df = sorted_dfs[3]
# study_3_best_trials_df = best_trial_dfs[3]

study_3_original_df.head()

Unnamed: 0,study_id,study_name,trial_id,trial_number,param_name,param_value,value
1460,106,distributed_optimization_15,651,0,lr,0.003186,1.543395
1461,106,distributed_optimization_15,651,0,units,205.0,1.543395
1462,106,distributed_optimization_15,652,1,lr,0.009952,1.681658
1463,106,distributed_optimization_15,652,1,units,429.0,1.681658
1464,106,distributed_optimization_15,653,2,lr,0.000235,1.57903


In [66]:
study_3_sorted_df = sorted_dfs["distributed_optimization_15"]

study_3_sorted_df.head(20)

Unnamed: 0,study_id,study_name,trial_id,trial_number,param_name,param_value,value
1659,106,distributed_optimization_15,852,117,units,861.0,1.512869
1658,106,distributed_optimization_15,852,117,lr,0.001817,1.512869
1536,106,distributed_optimization_15,729,46,lr,0.002133,1.520662
1537,106,distributed_optimization_15,729,46,units,290.0,1.520662
1770,106,distributed_optimization_15,990,190,lr,0.001618,1.522358
1771,106,distributed_optimization_15,990,190,units,487.0,1.522358
1607,106,distributed_optimization_15,789,82,units,557.0,1.523193
1606,106,distributed_optimization_15,789,82,lr,0.001958,1.523193
1520,106,distributed_optimization_15,717,38,lr,0.00098,1.523387
1521,106,distributed_optimization_15,717,38,units,166.0,1.523387


In [67]:
study_3_best_trials_df = best_trial_dfs["distributed_optimization_15"]

study_3_best_trials_df.head(20)

Unnamed: 0,study_id,study_name,trial_id,trial_number,param_name,param_value,value
1460,106,distributed_optimization_15,651,0,lr,0.003186,1.543395
1462,106,distributed_optimization_15,652,1,lr,0.009952,1.681658
1482,106,distributed_optimization_15,669,11,lr,0.008114,1.681672
1518,106,distributed_optimization_15,714,37,lr,0.000126,1.689234
1542,106,distributed_optimization_15,734,49,lr,0.004858,1.70442
1854,106,distributed_optimization_15,1088,252,lr,0.002077,1.727869
