In [None]:
import pandas as pd
import plotly.express as px
import re
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
pio.renderers.default = "notebook+pdf"

In [None]:
reg_config = r'([^ ]*) ([0-9]+) att ([0-9]+) SUPP'
reg_size1_patterns = r'Total : ([0-9]+) patterns'
reg_extented_sequence = r'\(([0-9]+) elements\)'
reg_exec_time_size_1 = r'Execution time size 1 pattern: ([0-9]+)s'
reg_exec_time_seq = r'Execution time extension : ([0-9]+)s'



In [None]:
lines = []
with open("../analyzed_results/support_impact.txt") as file:
    current_line = ""
    for line in file :
        
        if len(line)>1: 
            if re.match(reg_config, line):
                if len(current_line)>1:
                    lines.append(current_line)
                current_line=line + " "
            else :
                current_line+=line.strip()+" "
lines.append(current_line)
                

In [None]:
lines

In [None]:
tmp_l = lines[-1]
tmp_l

In [None]:
def getInformations(line):
    match_config = re.findall(reg_config, line)
    dataset = match_config[0][0]
    nb_att = int(match_config[0][1])
    vol = int(match_config[0][2])
    time_1 = int(re.findall(reg_exec_time_size_1, line)[0])
    time_seq = 0
    if 'OOME' in line:
        time_seq = -1
    else :
        time_seq = int(re.findall(reg_exec_time_seq, line)[0])
    match_sequences = re.findall(reg_extented_sequence, line)
    sum = 0
    for m in match_sequences:
        sum += int(m)
    nb_subgraph = int(re.findall(reg_size1_patterns, line)[0])

    dic = {}
    dic['Dataset'] = dataset
    dic['Nb Attributes'] = nb_att
    dic['Support'] = vol
    dic['Time Extraction Subgraphs'] = time_1
    dic['Time Sequence'] = time_seq
    dic['Nb Sequence'] = sum
    dic['Nb Subgraph'] = nb_subgraph
    return dic
    


In [None]:
li_cleaned = []
for l in lines:
    li_cleaned.append(getInformations(l))
li_cleaned

In [None]:
df = pd.DataFrame(li_cleaned)
df['Time Sequence'] = df.apply(lambda x : None if x['Time Sequence'] == -1 else x['Time Sequence'] , axis=1)
df

In [None]:
df['Time Extraction Subgraphs']=pd.to_numeric(df["Time Extraction Subgraphs"], downcast='float', )

In [None]:
def plotFigure(dataset):    
    fig = make_subplots(specs=[[{"secondary_y": True}]])

    tmp_df = df[df['Dataset'] == dataset]
    tmp_df.sort_values(by='Support', inplace=True)
    # print(tmp_df)
    # Add traces
    fig.add_trace(
        go.Scatter(x=tmp_df["Support"], y=tmp_df["Time Sequence"], name="Execution Time for Sequence extension (s)"),
        secondary_y=False,
    )

    fig.add_trace(
        go.Scatter(x=tmp_df["Support"], y=tmp_df["Nb Sequence"], name="Number of sequences extentded"),
        secondary_y=True,
    )

    # Add figure title
    fig.update_layout(
        title_text=f"Dataset : {dataset} Impact of Support on computation time and sequence results"
    )

    # Set x-axis title
    fig.update_xaxes(title_text="Support", )
    fig.update_yaxes(title_text="Execution Time for Sequence extension (s)", secondary_y=False, )
    fig.update_yaxes(title_text="Number of sequences extentded", secondary_y=True,)
    fig.show()

In [None]:
for c in df['Dataset'].value_counts().index:
    plotFigure(c)

In [None]:
for c in df['Dataset'].value_counts().index:
    # plotFigure(c)
    fig = px.bar(df[df.Dataset == c], x=df[df.Dataset == c].Support, y=['Time Extraction Subgraphs', 'Time Sequence'], barmode='group')
    fig.show()