In [84]:
import re
import pandas as pd
import os

In [85]:
#caso particolare perch√© sono un coglione e non ho considerato il fatto che ho tini diversi
def findTini(file):
    if "55627" in file:
        return "Tini-2"
    elif "55571" in file:
        return "Tini-1"
    elif "55496" in file:
        return "Tini0"


In [86]:
def format_data_name(data_name,file):
    # Estrai le parti del percorso del file
    parts = data_name.split('/')
    # Trova il nome del file senza estensione
    base_name = parts[-1].replace('.dat', '')
    # Trova la sottocartella (AP o NC)
    subfolder = parts[-2] if parts[-2] in ['AP', 'NC'] else ''
    # Trova il tipo di file (grid, airport, metroplex)
    if 'grid' in base_name:
        file_type = 'grid'
    elif 'airport' in base_name:
        file_type = 'airport'
    elif 'metroplex' in base_name:
        file_type = 'metroplex'
    else:
        file_type = ''

    # Aggiungi un'indicazione speciale se contiene "fixed"
    fixed = 'Fixed' if 'fixed' in data_name.lower() else ''

    # Componi il nome nel formato desiderato
    if subfolder:
        tini = findTini(file)
        formatted_name = f"{file_type}{subfolder}{fixed}{base_name[len(file_type):]}{tini}"
    else:
        formatted_name = f"{base_name}{fixed}"

    return formatted_name

In [87]:
def parse_variable_counts(text,file):
    # Pattern per catturare tutte le occorrenze di data_name
    data_name_pattern = re.compile(r"data_name\s*=\s*['\"](.+?)['\"]", re.DOTALL)
    variable_pattern = re.compile(r"(\d+)\s*variables:.*?(\d+)\s*binary\s*variables.*?(\d+)\s*integer\s*variables.*?(\d+)\s*linear\s*variables", re.DOTALL)
    data_names = data_name_pattern.findall(text)
    variable_matches = variable_pattern.findall(text)

    # Assumiamo che i blocchi di dati siano nello stesso ordine nei log
    data = []
    if (len(data_names) != len(variable_matches)):
        print ("Error")
        raise Exception("Mismatch between data names and variable counts")
    else:
        min_length = min(len(data_names), len(variable_matches))  # Evita mismatch

        for i in range(min_length):
            data_name = data_names[i]
            total_vars, binary_vars, integer_vars, linear_vars = map(int, variable_matches[i])
            formatted_name = format_data_name(data_name,file)
            data.append([formatted_name, total_vars, binary_vars, integer_vars, linear_vars])
        return data

In [88]:
# Percorso della cartella contenente i file di log
folder_path = "/home/magi/UAMdeconflictionMasterThesis/modelli/out/pelegrin"  # Sostituiscilo con il percorso corretto
prefix = "ampl_output"  # Modifica il prefisso in base alle tue esigenze

# Ottenere tutti i file che iniziano con il prefisso specificato
files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.startswith(prefix) and os.path.isfile(os.path.join(folder_path, f))]

# Lista per raccogliere i dati da tutti i file
all_data = []

for file in files:
    with open(file, "r") as f:
        content = f.read()
        all_data.extend(parse_variable_counts(content,file))

# Creazione del DataFrame
df = pd.DataFrame(all_data, columns=["Data Name", "Total", "Binary", "Integer", "Linear"])

# Filtra i dati per quelli che contengono "Fixed" e quelli che non lo contengono
df_fixed = df[df["Data Name"].str.contains("Fixed")]
df_non_fixed = df[~df["Data Name"].str.contains("Fixed")]

df_fixed.loc[:, "Data Name"] = df_fixed["Data Name"].str.replace("Fixed", "", regex=False)
# Raggruppamento e somma delle variabili per Data Name
df_fixed_grouped = df_fixed.groupby("Data Name").sum().reset_index()
df_non_fixed_grouped = df_non_fixed.groupby("Data Name").sum().reset_index()



In [89]:
# Converti i DataFrame in tabelle LaTeX
latex_table_fixed = df_fixed_grouped.to_latex("/home/magi/UAMdeconflictionMasterThesis/risultati/mercedes/variables/fixed_table.tex",longtable=True,index=False)
latex_table_non_fixed = df_non_fixed_grouped.to_latex("/home/magi/UAMdeconflictionMasterThesis/risultati/mercedes/variables/non_fixed_table.tex",longtable=True,index=False)

# salva i csv
df_fixed_grouped.to_csv("/home/magi/UAMdeconflictionMasterThesis/risultati/mercedes/variables/fixed_table.csv", index=False)
df_non_fixed_grouped.to_csv("/home/magi/UAMdeconflictionMasterThesis/risultati/mercedes/variables/non_fixed_table.csv", index=False)

### Drift and Delay

In [90]:
temp_df = df[df["Data Name"].str.contains(r"airport\d+nDr0nDe1")]
df_air01F = temp_df[temp_df["Data Name"].str.contains("Fixed")]
df_air01NF = temp_df[~temp_df["Data Name"].str.contains("Fixed")]

temp_df = df[df["Data Name"].str.contains(r"airport\d+nDr0nDe2")]
df_air02F = temp_df[temp_df["Data Name"].str.contains("Fixed")]
df_air02NF = temp_df[~temp_df["Data Name"].str.contains("Fixed")]

temp_df = df[df["Data Name"].str.contains(r"airport\d+nDr0nDe3")]
df_air03F = temp_df[temp_df["Data Name"].str.contains("Fixed")]
df_air03NF = temp_df[~temp_df["Data Name"].str.contains("Fixed")]

temp_df = df[df["Data Name"].str.contains(r"airport\d+nDr1nDe0")]
df_air10F = temp_df[temp_df["Data Name"].str.contains("Fixed")]
df_air10NF = temp_df[~temp_df["Data Name"].str.contains("Fixed")]

temp_df = df[df["Data Name"].str.contains(r"airport\d+nDr1nDe1")]
df_air11F = temp_df[temp_df["Data Name"].str.contains("Fixed")]
df_air11NF = temp_df[~temp_df["Data Name"].str.contains("Fixed")]



In [91]:
temp_df = df[df["Data Name"].str.contains(r"grid\d+nDr0nDe1")]
df_grid01F = temp_df[temp_df["Data Name"].str.contains("Fixed")]
df_grid01NF = temp_df[~temp_df["Data Name"].str.contains("Fixed")]

temp_df = df[df["Data Name"].str.contains(r"grid\d+nDr0nDe2")]
df_grid02F = temp_df[temp_df["Data Name"].str.contains("Fixed")]
df_grid02NF = temp_df[~temp_df["Data Name"].str.contains("Fixed")]

temp_df = df[df["Data Name"].str.contains(r"grid\d+nDr0nDe3")]
df_grid03F = temp_df[temp_df["Data Name"].str.contains("Fixed")]
df_grid03NF = temp_df[~temp_df["Data Name"].str.contains("Fixed")]

temp_df = df[df["Data Name"].str.contains(r"grid\d+nDr1nDe0")]
df_grid10F = temp_df[temp_df["Data Name"].str.contains("Fixed")]
df_grid10NF = temp_df[~temp_df["Data Name"].str.contains("Fixed")]

temp_df = df[df["Data Name"].str.contains(r"grid\d+nDr1nDe1")]
df_grid11F = temp_df[temp_df["Data Name"].str.contains("Fixed")]
df_grid11NF = temp_df[~temp_df["Data Name"].str.contains("Fixed")]



In [92]:
temp_df = df[df["Data Name"].str.contains(r"metroplex\d+nDr0nDe1")]
df_mp01F = temp_df[temp_df["Data Name"].str.contains("Fixed")]
df_mp01NF = temp_df[~temp_df["Data Name"].str.contains("Fixed")]

temp_df = df[df["Data Name"].str.contains(r"metroplex\d+nDr0nDe2")]
df_mp02F = temp_df[temp_df["Data Name"].str.contains("Fixed")]
df_mp02NF = temp_df[~temp_df["Data Name"].str.contains("Fixed")]

temp_df = df[df["Data Name"].str.contains(r"metroplex\d+nDr0nDe3")]
df_mp03F = temp_df[temp_df["Data Name"].str.contains("Fixed")]
df_mp03NF = temp_df[~temp_df["Data Name"].str.contains("Fixed")]

temp_df = df[df["Data Name"].str.contains(r"metroplex\d+nDr1nDe0")]
df_mp10F = temp_df[temp_df["Data Name"].str.contains("Fixed")]
df_mp10NF = temp_df[~temp_df["Data Name"].str.contains("Fixed")]

temp_df = df[df["Data Name"].str.contains(r"metroplex\d+nDr1nDe1")]
df_mp11F = temp_df[temp_df["Data Name"].str.contains("Fixed")]
df_mp11NF = temp_df[~temp_df["Data Name"].str.contains("Fixed")]



### AP

In [93]:
airport_df= df[df["Data Name"].str.contains(r"airportAP")]
temp_df = airport_df[airport_df["Data Name"].str.contains(r"Tini0")]
df_airAP0F = temp_df[temp_df["Data Name"].str.contains("Fixed")]
df_airAP0NF = temp_df[~temp_df["Data Name"].str.contains("Fixed")]

temp_df = airport_df[airport_df["Data Name"].str.contains(r"Tini-1")]
df_airAP1F = temp_df[temp_df["Data Name"].str.contains("Fixed")]
df_airAP1NF = temp_df[~temp_df["Data Name"].str.contains("Fixed")]

temp_df = airport_df[airport_df["Data Name"].str.contains(r"Tini-2")]
df_airAP2F = temp_df[temp_df["Data Name"].str.contains("Fixed")]
df_airAP2NF = temp_df[~temp_df["Data Name"].str.contains("Fixed")]

In [94]:
grid_df = df[df["Data Name"].str.contains(r"gridAP")]
temp_df = grid_df[grid_df["Data Name"].str.contains(r"Tini0")]
df_gridAP0F = temp_df[temp_df["Data Name"].str.contains("Fixed")]
df_gridAP0NF = temp_df[~temp_df["Data Name"].str.contains("Fixed")]

temp_df = grid_df[grid_df["Data Name"].str.contains(r"Tini-1")]
df_gridAP1F = temp_df[temp_df["Data Name"].str.contains("Fixed")]
df_gridAP1NF = temp_df[~temp_df["Data Name"].str.contains("Fixed")]

temp_df = grid_df[grid_df["Data Name"].str.contains(r"Tini-2")]
df_gridAP2F = temp_df[temp_df["Data Name"].str.contains("Fixed")]
df_gridAP2NF = temp_df[~temp_df["Data Name"].str.contains("Fixed")]

In [95]:
metroplex_df = df[df["Data Name"].str.contains(r"metroplexAP")]
temp_df = metroplex_df[metroplex_df["Data Name"].str.contains(r"Tini0")]
df_mpAP0F = temp_df[temp_df["Data Name"].str.contains("Fixed")]
df_mpAP0NF = temp_df[~temp_df["Data Name"].str.contains("Fixed")]

temp_df = metroplex_df[metroplex_df["Data Name"].str.contains(r"Tini-1")]
df_mpAP1F = temp_df[temp_df["Data Name"].str.contains("Fixed")]
df_mpAP1NF = temp_df[~temp_df["Data Name"].str.contains("Fixed")]

temp_df = metroplex_df[metroplex_df["Data Name"].str.contains( r"Tini-2")]
df_mpAP2F = temp_df[temp_df["Data Name"].str.contains("Fixed")]
df_mpAP2NF = temp_df[~temp_df["Data Name"].str.contains("Fixed")]

### NC

In [96]:
airport_df = df[df["Data Name"].str.contains(r"airportNC")]

temp_df = airport_df[airport_df["Data Name"].str.contains(r"Tini0")]
df_airNC0F = temp_df[temp_df["Data Name"].str.contains("Fixed")]
df_airNC0NF = temp_df[~temp_df["Data Name"].str.contains("Fixed")]

temp_df = airport_df[airport_df["Data Name"].str.contains(r"Tini-1")]
df_airNC1F = temp_df[temp_df["Data Name"].str.contains("Fixed")]
df_airNC1NF = temp_df[~temp_df["Data Name"].str.contains("Fixed")]

temp_df = airport_df[airport_df["Data Name"].str.contains(r"Tini-2")]
df_airNC2F = temp_df[temp_df["Data Name"].str.contains("Fixed")]
df_airNC2NF = temp_df[~temp_df["Data Name"].str.contains("Fixed")]

In [97]:
grid_df = df[df["Data Name"].str.contains(r"gridNC")]

temp_df = grid_df[grid_df["Data Name"].str.contains(r"Tini0")]
df_gridNC0F = temp_df[temp_df["Data Name"].str.contains("Fixed")]
df_gridNC0NF = temp_df[~temp_df["Data Name"].str.contains("Fixed")]

temp_df = grid_df[grid_df["Data Name"].str.contains(r"Tini-1")]
df_gridNC1F = temp_df[temp_df["Data Name"].str.contains("Fixed")]
df_gridNC1NF = temp_df[~temp_df["Data Name"].str.contains("Fixed")]

temp_df = grid_df[grid_df["Data Name"].str.contains(r"Tini-2")]
df_gridNC2F = temp_df[temp_df["Data Name"].str.contains("Fixed")]
df_gridNC2NF = temp_df[~temp_df["Data Name"].str.contains("Fixed")]

In [98]:
metroplex_df= df[df["Data Name"].str.contains(r"metroplexNC")]

temp_df = metroplex_df[metroplex_df["Data Name"].str.contains(r"Tini0")]
df_mpNC0F = temp_df[temp_df["Data Name"].str.contains("Fixed")]
df_mpNC0NF = temp_df[~temp_df["Data Name"].str.contains("Fixed")]

temp_df = metroplex_df[metroplex_df["Data Name"].str.contains(r"Tini-1")]
df_mpNC1F = temp_df[temp_df["Data Name"].str.contains("Fixed")]
df_mpNC1NF = temp_df[~temp_df["Data Name"].str.contains("Fixed")]

temp_df = metroplex_df[metroplex_df["Data Name"].str.contains(r"Tini-2")]
df_mpNC2F = temp_df[temp_df["Data Name"].str.contains("Fixed")]
df_mpNC2NF = temp_df[~temp_df["Data Name"].str.contains("Fixed")]

In [99]:
def get_formatted_name(data_name):
    data_name = data_name.replace("Fixed", "")
    match = re.match(r"([a-zA-Z]+)(\d+)([a-zA-Z0-9-]+)", data_name)
    if match:
        part1 = match.group(1)
        part2 = match.group(3)
        return f"{part1}{part2}"
    else:
        return data_name  # Caso in cui non ci sia un numero nel nome

### latex Creation

In [101]:
dfsF = [df_air11F,df_air10F,df_air01F,df_air02F,df_air03F,df_grid11F,df_grid10F,df_grid01F,df_grid02F,df_grid03F,df_mp11F,df_mp10F,df_mp01F,df_mp02F,df_mp03F]
dfsF += [df_airAP0F,df_airAP1F,df_airAP2F,df_gridAP0F,df_gridAP1F,df_gridAP2F,df_mpAP0F,df_mpAP1F,df_mpAP2F,df_airNC0F,df_airNC1F,df_airNC2F,df_gridNC0F,df_gridNC1F,df_gridNC2F,df_mpNC0F,df_mpNC1F,df_mpNC2F]

with open("/home/magi/UAMdeconflictionMasterThesis/risultati/mercedes/variables/SummaryTablesFixed.tex", "w") as f:
    f.write("\\begin{document}\n")
    #prima tabella: numero di variabili totale
    results = []
    for i, df in enumerate(dfsF):
        column = df["Total"]
        stats = {
            "Typology": get_formatted_name(df["Data Name"].iloc[0]),
            "Mean":     column.mean(),
            "STD":      column.std(),
            "Min":      column.min(),
            "Max":      column.max(),
            "Count":   column.count()
        }
        results.append(stats)


    latex_table = pd.DataFrame(results).to_latex(index=False,column_format="|l|r|r|r|r|r|", caption="Statistic about \\textbf{total number} of variable of Mercedes instances with all path fixed", label="table:mercedes:totalVar:fixed", longtable=True)
    latex_table = latex_table.replace(r'\toprule', r'\hline').replace(r'\midrule', r'\hline').replace(r'\bottomrule', r'\hline').replace(r'\\',r'\\ \hline')
    f.write(latex_table)

    #seconda tabella: numero di variabili binarie

    results=[]
    for i, df in enumerate(dfsF):
        column = df["Binary"]
        stats = {
            "Typology": get_formatted_name(df["Data Name"].iloc[0]),
            "Mean":     column.mean(),
            "STD":      column.std(),
            "Min":      column.min(),
            "Max":      column.max(),
            "Count":   column.count()
        }
        results.append(stats)


    latex_table =  pd.DataFrame(results).to_latex(index=False,column_format="|l|r|r|r|r|r|", caption="Statistic about total number of \\textbf{binary} variable of Mercedes instances with all path fixed", label="table:mercedes:binaryVar:fixed", longtable=True)
    latex_table = latex_table.replace(r'\toprule', r'\hline').replace(r'\midrule', r'\hline').replace(r'\bottomrule', r'\hline').replace(r'\\',r'\\ \hline')
    f.write(latex_table)


    #terza tabella: numero di variabili intere

    results=[]
    for i, df in enumerate(dfsF):
        column = df["Integer"]
        stats = {
            "Typology": get_formatted_name(df["Data Name"].iloc[0]),
            "Mean":     column.mean(),
            "STD":      column.std(),
            "Min":      column.min(),
            "Max":      column.max(),
            "Count":   column.count()
        }
        results.append(stats)

    latex_table = pd.DataFrame(results).to_latex(index=False,column_format="|l|r|r|r|r|r|", caption="Statistic about total number of \\textbf{integer} variable of Mercedes instances with all path fixed", label="table:mercedes:integerVar:fixed", longtable=True)
    latex_table = latex_table.replace(r'\toprule', r'\hline').replace(r'\midrule', r'\hline').replace(r'\bottomrule', r'\hline').replace(r'\\',r'\\ \hline')
    f.write(latex_table)

    #quarta tabella: numero di variabili lineari

    results=[]
    for i, df in enumerate(dfsF):
        column = df["Linear"]
        stats = {
            "Typology": get_formatted_name(df["Data Name"].iloc[0]),
            "Mean":     column.mean(),
            "STD":      column.std(),
            "Min":      column.min(),
            "Max":      column.max(),
            "Count":   column.count()
        }
        results.append(stats)

    latex_table = pd.DataFrame(results).to_latex(index=False,column_format="|l|r|r|r|r|r|", caption="Statistic about total number of \\textbf{linear} variable of Mercedes instances with all path fixed", label="table:mercedes:linearVar:fixed", longtable=True)
    latex_table = latex_table.replace(r'\toprule', r'\hline').replace(r'\midrule', r'\hline').replace(r'\bottomrule', r'\hline').replace(r'\\',r'\\ \hline')
    f.write(latex_table)

    f.write("\\end{document}")


In [102]:
dfsNF = [df_air11NF,df_air10NF,df_air01NF,df_air02NF,df_air03NF,df_grid11NF,df_grid10NF,df_grid01NF,df_grid02NF,df_grid03NF,df_mp11NF,df_mp10NF,df_mp01NF,df_mp02NF,df_mp03NF]
dfsNF += [df_airAP0NF,df_airAP1NF,df_airAP2NF,df_gridAP0NF,df_gridAP1NF,df_gridAP2NF,df_mpAP0NF,df_mpAP1NF,df_mpAP2NF,df_airNC0NF,df_airNC1NF,df_airNC2NF,df_gridNC0NF,df_gridNC1NF,df_gridNC2NF,df_mpNC0NF,df_mpNC1NF,df_mpNC2NF]

# Creiamo un file .tex e scriviamo le tabelle con label e caption
with open("/home/magi/UAMdeconflictionMasterThesis/risultati/mercedes/variables/SummaryTablesFree.tex", "w") as f:
    f.write("\\begin{document}\n")
    #prima tabella: numero di variabili totale
    results = []
    for i, df in enumerate(dfsNF):
        column = df["Total"]
        stats = {
            "Typology": get_formatted_name(df["Data Name"].iloc[0]),
            "Mean":     column.mean(),
            "STD":      column.std(),
            "Min":      column.min(),
            "Max":      column.max(),
            "Count":   column.count()
        }
        results.append(stats)
        
    latex_table=pd.DataFrame(results).to_latex(index=False,column_format="|l|r|r|r|r|r|", caption="Statistic about \\textbf{total number} of variable of Mercedes instances with some free path", label="table:mercedes:totalVar:free", longtable=True)
    latex_table = latex_table.replace(r'\toprule', r'\hline').replace(r'\midrule', r'\hline').replace(r'\bottomrule', r'\hline').replace(r'\\',r'\\ \hline')
    f.write(latex_table)

    #seconda tabella: numero di variabili binarie

    results=[]
    for i, df in enumerate(dfsNF):
        column = df["Binary"]
        stats = {
            "Typology": get_formatted_name(df["Data Name"].iloc[0]),
            "Mean":     column.mean(),
            "STD":      column.std(),
            "Min":      column.min(),
            "Max":      column.max(),
            "Count":    column.count()
        }
        results.append(stats)


    latex_table=pd.DataFrame(results).to_latex(index=False,column_format="|l|r|r|r|r|r|", caption="Statistic about total number of \\textbf{binary} variable of Mercedes instances with some free path", label="table:mercedes:binaryVar:free", longtable=True)
    latex_table = latex_table.replace(r'\toprule', r'\hline').replace(r'\midrule', r'\hline').replace(r'\bottomrule', r'\hline').replace(r'\\',r'\\ \hline')
    f.write(latex_table)


    #terza tabella: numero di variabili intere

    results=[]
    for i, df in enumerate(dfsNF):
        column = df["Integer"]
        stats = {
            "Typology": get_formatted_name(df["Data Name"].iloc[0]),
            "Mean":     column.mean(),
            "STD":      column.std(),
            "Min":      column.min(),
            "Max":      column.max(),
            "Count":    column.count()
        }
        results.append(stats)


    latex_table=pd.DataFrame(results).to_latex(index=False,column_format="|l|r|r|r|r|r|", caption="Statistic about total number of \\textbf{integer} variable of Mercedes instances with some path free", label="table:mercedes:integerVar:free", longtable=True)
    latex_table = latex_table.replace(r'\toprule', r'\hline').replace(r'\midrule', r'\hline').replace(r'\bottomrule', r'\hline').replace(r'\\',r'\\ \hline')
    f.write(latex_table)

        
    #quarta tabella: numero di variabili lineari

    results=[]
    for i, df in enumerate(dfsNF):
        column = df["Linear"]
        stats = {
            "Typology": get_formatted_name(df["Data Name"].iloc[0]),
            "Mean":     column.mean(),
            "STD":      column.std(),
            "Min":      column.min(),
            "Max":      column.max(),
            "Count":    column.count()
        }
        results.append(stats)


    latex_table=pd.DataFrame(results).to_latex(index=False,column_format="|l|r|r|r|r|r|", caption="Statistic about total number of \\textbf{linear} variable of Mercedes instances with some free path", label="table:mercedes:linearVar:free", longtable=True)
    latex_table = latex_table.replace(r'\toprule', r'\hline').replace(r'\midrule', r'\hline').replace(r'\bottomrule', r'\hline').replace(r'\\',r'\\ \hline')
    f.write(latex_table)


    f.write("\\end{document}")


In [103]:
results = []
for i, df in enumerate(dfsNF):
    column = df["Total"]
    stats = {
        "Typology": get_formatted_name(df["Data Name"].iloc[0]),
        "Mean": column.mean(),
        "STD": column.std(),
        "Min": column.min(),
        "Max": column.max(),
        "Count": column.count()
    }
    results.append(stats)
#print(pd.DataFrame(results))

## Add comparison

### Define variables

In [104]:
#DD
df_air11 = pd.merge(df_air11NF, df_air11F.assign(**{"Data Name": df_air11F["Data Name"].str.replace("Fixed", "", regex=False)}), on="Data Name", suffixes=("", "Fixed"))
df_air10 = pd.merge(df_air10NF, df_air10F.assign(**{"Data Name": df_air10F["Data Name"].str.replace("Fixed", "", regex=False)}), on="Data Name", suffixes=("", "Fixed"))
df_air01 = pd.merge(df_air01NF, df_air01F.assign(**{"Data Name": df_air01F["Data Name"].str.replace("Fixed", "", regex=False)}), on="Data Name", suffixes=("", "Fixed"))
df_air02 = pd.merge(df_air02NF, df_air02F.assign(**{"Data Name": df_air02F["Data Name"].str.replace("Fixed", "", regex=False)}), on="Data Name", suffixes=("", "Fixed"))
df_air03 = pd.merge(df_air03NF, df_air03F.assign(**{"Data Name": df_air03F["Data Name"].str.replace("Fixed", "", regex=False)}), on="Data Name", suffixes=("", "Fixed"))
df_grid11 = pd.merge(df_grid11NF, df_grid11F.assign(**{"Data Name": df_grid11F["Data Name"].str.replace("Fixed", "", regex=False)}), on="Data Name", suffixes=("", "Fixed"))
df_grid10 = pd.merge(df_grid10NF, df_grid10F.assign(**{"Data Name": df_grid10F["Data Name"].str.replace("Fixed", "", regex=False)}), on="Data Name", suffixes=("", "Fixed"))
df_grid01 = pd.merge(df_grid01NF, df_grid01F.assign(**{"Data Name": df_grid01F["Data Name"].str.replace("Fixed", "", regex=False)}), on="Data Name", suffixes=("", "Fixed"))
df_grid02 = pd.merge(df_grid02NF, df_grid02F.assign(**{"Data Name": df_grid02F["Data Name"].str.replace("Fixed", "", regex=False)}), on="Data Name", suffixes=("", "Fixed"))
df_grid03 = pd.merge(df_grid03NF, df_grid03F.assign(**{"Data Name": df_grid03F["Data Name"].str.replace("Fixed", "", regex=False)}), on="Data Name", suffixes=("", "Fixed"))
df_mp11 = pd.merge(df_mp11NF, df_mp11F.assign(**{"Data Name": df_mp11F["Data Name"].str.replace("Fixed", "", regex=False)}), on="Data Name", suffixes=("", "Fixed"))
df_mp10 = pd.merge(df_mp10NF, df_mp10F.assign(**{"Data Name": df_mp10F["Data Name"].str.replace("Fixed", "", regex=False)}), on="Data Name", suffixes=("", "Fixed"))
df_mp01 = pd.merge(df_mp01NF, df_mp01F.assign(**{"Data Name": df_mp01F["Data Name"].str.replace("Fixed", "", regex=False)}), on="Data Name", suffixes=("", "Fixed"))
df_mp02 = pd.merge(df_mp02NF, df_mp02F.assign(**{"Data Name": df_mp02F["Data Name"].str.replace("Fixed", "", regex=False)}), on="Data Name", suffixes=("", "Fixed"))
df_mp03 = pd.merge(df_mp03NF, df_mp03F.assign(**{"Data Name": df_mp03F["Data Name"].str.replace("Fixed", "", regex=False)}), on="Data Name", suffixes=("", "Fixed"))


In [105]:
#AP
df_airAP0 = pd.merge(df_airAP0NF, df_airAP0F.assign(**{"Data Name": df_airAP0F["Data Name"].str.replace("Fixed", "", regex=False)}), on="Data Name", suffixes=("", "Fixed"))
df_airAP1 = pd.merge(df_airAP1NF, df_airAP1F.assign(**{"Data Name": df_airAP1F["Data Name"].str.replace("Fixed", "", regex=False)}), on="Data Name", suffixes=("", "Fixed"))
df_airAP2 = pd.merge(df_airAP2NF, df_airAP2F.assign(**{"Data Name": df_airAP2F["Data Name"].str.replace("Fixed", "", regex=False)}), on="Data Name", suffixes=("", "Fixed"))
df_gridAP0 = pd.merge(df_gridAP0NF, df_gridAP0F.assign(**{"Data Name": df_gridAP0F["Data Name"].str.replace("Fixed", "", regex=False)}), on="Data Name", suffixes=("", "Fixed"))
df_gridAP1 = pd.merge(df_gridAP1NF, df_gridAP1F.assign(**{"Data Name": df_gridAP1F["Data Name"].str.replace("Fixed", "", regex=False)}), on="Data Name", suffixes=("", "Fixed"))
df_gridAP2 = pd.merge(df_gridAP2NF, df_gridAP2F.assign(**{"Data Name": df_gridAP2F["Data Name"].str.replace("Fixed", "", regex=False)}), on="Data Name", suffixes=("", "Fixed"))
df_mpAP0 = pd.merge(df_mpAP0NF, df_mpAP0F.assign(**{"Data Name": df_mpAP0F["Data Name"].str.replace("Fixed", "", regex=False)}), on="Data Name", suffixes=("", "Fixed"))
df_mpAP1 = pd.merge(df_mpAP1NF, df_mpAP1F.assign(**{"Data Name": df_mpAP1F["Data Name"].str.replace("Fixed", "", regex=False)}), on="Data Name", suffixes=("", "Fixed"))
df_mpAP2 = pd.merge(df_mpAP2NF, df_mpAP2F.assign(**{"Data Name": df_mpAP2F["Data Name"].str.replace("Fixed", "", regex=False)}), on="Data Name", suffixes=("", "Fixed"))


In [106]:
#NC
df_airNC0 = pd.merge(df_airNC0NF, df_airNC0F.assign(**{"Data Name": df_airNC0F["Data Name"].str.replace("Fixed", "", regex=False)}), on="Data Name", suffixes=("", "Fixed"))
df_airNC1 = pd.merge(df_airNC1NF, df_airNC1F.assign(**{"Data Name": df_airNC1F["Data Name"].str.replace("Fixed", "", regex=False)}), on="Data Name", suffixes=("", "Fixed"))
df_airNC2 = pd.merge(df_airNC2NF, df_airNC2F.assign(**{"Data Name": df_airNC2F["Data Name"].str.replace("Fixed", "", regex=False)}), on="Data Name", suffixes=("", "Fixed"))
df_gridNC0 = pd.merge(df_gridNC0NF, df_gridNC0F.assign(**{"Data Name": df_gridNC0F["Data Name"].str.replace("Fixed", "", regex=False)}), on="Data Name", suffixes=("", "Fixed"))
df_gridNC1 = pd.merge(df_gridNC1NF, df_gridNC1F.assign(**{"Data Name": df_gridNC1F["Data Name"].str.replace("Fixed", "", regex=False)}), on="Data Name", suffixes=("", "Fixed"))
df_gridNC2 = pd.merge(df_gridNC2NF, df_gridNC2F.assign(**{"Data Name": df_gridNC2F["Data Name"].str.replace("Fixed", "", regex=False)}), on="Data Name", suffixes=("", "Fixed"))
df_mpNC0 = pd.merge(df_mpNC0NF, df_mpNC0F.assign(**{"Data Name": df_mpNC0F["Data Name"].str.replace("Fixed", "", regex=False)}), on="Data Name", suffixes=("", "Fixed"))
df_mpNC1 = pd.merge(df_mpNC1NF, df_mpNC1F.assign(**{"Data Name": df_mpNC1F["Data Name"].str.replace("Fixed", "", regex=False)}), on="Data Name", suffixes=("", "Fixed"))
df_mpNC2 = pd.merge(df_mpNC2NF, df_mpNC2F.assign(**{"Data Name": df_mpNC2F["Data Name"].str.replace("Fixed", "", regex=False)}), on="Data Name", suffixes=("", "Fixed"))


In [107]:
def obtainStatsComparison(filtered_merged, columnName):
        stats = {
            "Typology":             get_formatted_name(filtered_merged["Data Name"].iloc[0]),
            "MeanNonFixed":         filtered_merged[columnName].mean(),
            "MeanFixed":            filtered_merged[columnName+"Fixed"].mean(),
            "STDNonFixed":          filtered_merged[columnName].std(),
            "STDFixed":             filtered_merged[columnName+"Fixed"].std(),
            "MinNonFixed":          filtered_merged[columnName].min(),
            "MinFixed":             filtered_merged[columnName+"Fixed"].min(),
            "MaxNonFixed":          filtered_merged[columnName].max(),
            "MaxFixed":             filtered_merged[columnName+"Fixed"].max(),           
            "TotalCount":           filtered_merged[columnName].count()
        }
        return stats

In [108]:
dfs =  [df_air11,df_air10,df_air01,df_air02,df_air03,df_grid11,df_grid10,df_grid01,df_grid02,df_grid03,df_mp11,df_mp10,df_mp01,df_mp02,df_mp03]
dfs += [df_airAP0,df_airAP1,df_airAP2,df_gridAP0,df_gridAP1,df_gridAP2,df_mpAP0,df_mpAP1,df_mpAP2]
dfs += [df_airNC0,df_airNC1,df_airNC2,df_gridNC0,df_gridNC1,df_gridNC2,df_mpNC0,df_mpNC1,df_mpNC2]

# Creiamo un file .tex e scriviamo le tabelle con label e caption
with open("/home/magi/UAMdeconflictionMasterThesis/risultati/mercedes/variables/ComparisonTables.tex", "w") as f:
    f.write("\\begin{document}\n")

    #prima tabella: numero di variabili totale

    results = []
    for i, df in enumerate(dfs):
        stats = obtainStatsComparison(df, "Total")
        results.append(stats)
    
    latex_table = pd.DataFrame(results).to_latex(index=False, longtable=True, column_format="|l|r|r|r|r|r|r|r|r|r",caption="Comparison between \\textbf{total number} of variable in Mercedes instances", label="table:mercedes:totalVarComparison")
    latex_table = latex_table.replace(r'\toprule', r'\hline').replace(r'\midrule', r'\hline').replace(r'\bottomrule', r'\hline')
    f.write(latex_table)

    #seconda tabella: numero di variabili binarie

    results = []
    for i, df in enumerate(dfs):
        stats = obtainStatsComparison(df, "Binary")
        results.append(stats)
    
    latex_table = pd.DataFrame(results).to_latex(index=False, longtable=True, column_format="|l|r|r|r|r|r|r|r|r|r|",caption="Comparison between total number of \\textbf{binary} variable in Mercedes instances", label="table:mercedes:binaryVarComparison")
    latex_table = latex_table.replace(r'\toprule', r'\hline').replace(r'\midrule', r'\hline').replace(r'\bottomrule', r'\hline')
    f.write(latex_table)

    #terza tabella: numero di variabili intere

    results = []
    for i, df in enumerate(dfs):
        stats = obtainStatsComparison(df, "Integer")
        results.append(stats)
    
    latex_table = pd.DataFrame(results).to_latex(index=False, longtable=True, column_format="|l|r|r|r|r|r|r|r|r|r|",caption="Comparison between total number of \\textbf{integer} variable in Mercedes instances", label="table:mercedes:integerVarComparison")
    latex_table = latex_table.replace(r'\toprule', r'\hline').replace(r'\midrule', r'\hline').replace(r'\bottomrule', r'\hline')
    f.write(latex_table)

    #quarta tabella: numero di variabili lineari
    results = []
    for i, df in enumerate(dfs):
        stats = obtainStatsComparison(df, "Linear")
        results.append(stats)
    latex_table = pd.DataFrame(results).to_latex(index=False, longtable=True, column_format="|l|r|r|r|r|r|r|r|r|r|",caption="Comparison between total number of \\textbf{linear} variable in Mercedes instances", label="table:mercedes:linearVarComparison")
    latex_table = latex_table.replace(r'\toprule', r'\hline').replace(r'\midrule', r'\hline').replace(r'\bottomrule', r'\hline')
    f.write(latex_table)
    
    f.write("\\end{document}")
