# Analyse the code

## load data
Load the data with 0 for empty values

In [1]:
import pandas as pd
import os

def load_excel_to_df(filename):
    """
    Load an Excel file into a pandas DataFrame and replace empty values with 0.

    Parameters:
    -----------
    filename : str
        Path to the Excel file (.xlsx)

    Returns:
    --------
    pandas.DataFrame
        DataFrame containing the Excel data with empty values replaced by 0
    """
    # Check if file exists
    if not os.path.exists(filename):
        raise FileNotFoundError(f"The file {filename} does not exist.")

    # Check if file is an Excel file
    if not filename.endswith('.xlsx'):
        raise ValueError(f"The file {filename} is not an Excel file (.xlsx).")

    try:
        # Read the Excel file
        df = pd.read_excel(filename)

        # Replace empty values (NaN) with 0
        df = df.fillna(0)

        return df

    except Exception as e:
        raise Exception(f"Error loading Excel file: {str(e)}")

df = load_excel_to_df('structured_data.xlsx')

In [2]:
df.describe()


Unnamed: 0,num_stackdepth3_logs,evts_1,expandEvts_1,pruneBacktrackEvts_1,backtrackEvts_1,strengthenEvts_1,maxStackDepth_1,evts_2,expandEvts_2,pruneBacktrackEvts_2,...,k,total_sum,variance,skewness,max_num,min_num,avg_subset_sum,max_to_avg_ratio,range_to_avg_ratio,coef_of_variation
count,690.0,690.0,690.0,690.0,690.0,690.0,690.0,690.0,690.0,690.0,...,690.0,690.0,690.0,690.0,690.0,690.0,690.0,690.0,690.0,690.0
mean,2.326087,4.0,4.0,0.0,0.0,0.0,3.0,1342984000.0,671491800.0,666915700.0,...,4.0,1370.911594,778.641904,-0.016508,94.821739,5.068116,358.045507,0.392159,0.362599,0.563883
std,1.488042,0.0,0.0,0.0,0.0,0.0,0.0,1283705000.0,641852400.0,636833000.0,...,0.817089,677.5101,171.200962,0.341746,6.041352,6.476462,196.169481,0.301732,0.269556,0.097062
min,1.0,4.0,4.0,0.0,0.0,0.0,3.0,11.0,6.0,2.0,...,3.0,167.0,57.2,-1.460793,49.0,1.0,33.4,0.100754,0.089445,0.085944
25%,1.0,4.0,4.0,0.0,0.0,0.0,3.0,10178.5,5089.75,4982.0,...,3.0,787.25,683.707333,-0.201917,94.0,2.0,192.0625,0.199569,0.19266,0.502009
50%,2.0,4.0,4.0,0.0,0.0,0.0,3.0,1895630000.0,947815000.0,947562000.0,...,4.0,1376.0,772.818979,-0.008982,97.0,3.0,349.0,0.279066,0.264916,0.564235
75%,4.0,4.0,4.0,0.0,0.0,0.0,3.0,2617559000.0,1308780000.0,1288496000.0,...,5.0,1895.0,885.024201,0.190216,98.0,6.0,486.291667,0.478214,0.440201,0.621917
max,5.0,4.0,4.0,0.0,0.0,0.0,3.0,3013720000.0,1506860000.0,1480088000.0,...,5.0,2918.0,1497.49,1.111964,99.0,78.0,972.666667,2.964072,2.904192,1.095596
