In [None]:
import os
import json
import pandas as pd

def flatten_json(y):
    out = {}

    def flatten(x, name=''):
        if isinstance(x, dict):
            for a in x:
                flatten(x[a], f'{name}{a}.')
        elif isinstance(x, list):
            # Handle lists by extracting summary statistics if they contain numbers
            if len(x) == 0:
                out[name[:-1]] = None
            elif all(isinstance(i, (int, float)) for i in x):
                out[name + 'mean'] = sum(x) / len(x)
                out[name + 'min'] = min(x)
                out[name + 'max'] = max(x)
                out[name + 'last'] = x[-1]
            else:
                # Store the length of the list for non-numeric lists
                out[name + 'length'] = len(x)
        else:
            out[name[:-1]] = x

    flatten(y)
    return out

def load_results_to_dataframe(results_dir):
    data_list = []
    for root, dirs, files in os.walk(results_dir):
        for file in files:
            if file.endswith('.json'):
                json_path = os.path.join(root, file)
                with open(json_path, 'r') as f:
                    try:
                        data = json.load(f)
                        flat_data = flatten_json(data)
                        flat_data['file_name'] = os.path.join(root.split('/')[-1], file)
                        data_list.append(flat_data)
                    except json.JSONDecodeError as e:
                        print(f"Error decoding JSON from file {json_path}: {e}")
    df = pd.DataFrame(data_list)
    return df

# Usage example
if __name__ == "__main__":
    results_dir = '/home/skage/projects/ikt450_deep-neural-networks/stocknet-project-ikt450/skage/results'  # Replace with your actual path
    df = load_results_to_dataframe(results_dir)
    

    # Select relevant columns
    columns_of_interest = [
        'file_name',
        'Model',
        'Dataclass',
        'Results Testset.accuracy_test',
        'Results Testset.F1_test',
        'Results Testset.MCC_test',
        'Results Testset.precision_test',
        'Results Testset.recall_test'
    ]
    # Keep only the columns that are present in the DataFrame
    columns_present = [col for col in columns_of_interest if col in df.columns]
    df_selected = df[columns_present]
    
    # Sort by test accuracy in descending order
    df_sorted = df_selected.sort_values(by='Results Testset.accuracy_test', ascending=False)
    
    # Display the sorted DataFrame
    print(df_sorted)

    print('All keys:', df.columns)

Error decoding JSON from file /home/skage/projects/ikt450_deep-neural-networks/stocknet-project-ikt450/skage/results/price_history_only_0001.json: Expecting value: line 27 column 19 (char 710)
                                             file_name              Model  \
93   model_Depth_First_GRU2_dataset_NormSentimentAl...   Depth_First_GRU2   
74   model_Shallow_First_GRU_dataset_TwitterSentime...  Shallow_First_GRU   
90   model_Depth_First_GRU2_dataset_NormSentimentAl...   Depth_First_GRU2   
3    model_GRU_Deep_dataset_TwitterSentimentVolumeP...           GRU_Deep   
16   model_Two_Layer_LSTM_dataset_SentimentPriceXPr...     Two_Layer_LSTM   
..                                                 ...                ...   
110       model_LSTM_v1_dataset_TweetXPriceY/0012.json            LSTM_v1   
111       model_LSTM_v1_dataset_TweetXPriceY/0015.json            LSTM_v1   
112       model_LSTM_v1_dataset_TweetXPriceY/0007.json            LSTM_v1   
113       model_LSTM_v1_dataset_Tweet