In [11]:
import pandas as pd
import re

def parse_file(path, name):
    data = []
    current_test = {}  # Armazenará os dados completos de um teste (BB + Aprox)
    last_method = None
    
    with open(path, 'r', encoding='utf-8') as f:
        for line in f:
            line = line.strip()

            # Dados do problema
            if line.startswith('* Número de itens: '):
                # Se já temos dados do método anterior, prepara para novo teste
                if last_method == '2-aproximativo-guloso':
                    data.append(current_test)
                    current_test = {}
                current_test['N'] = line.split(': ')[1].strip()
            elif line.startswith('* Peso médio:'):
                current_test['w_avg'] = line.split(': ')[1].strip()
            elif line.startswith('* Peso máximo:'):
                current_test['w_max'] = line.split(': ')[1].strip()
            elif line.startswith('* Valor médio:'):
                current_test['v_avg'] = line.split(': ')[1].strip()
            elif line.startswith('* Valor máximo:'):
                current_test['v_max'] = line.split(': ')[1].strip()
            elif line.startswith('* Capacidade da Mochila:'):
                current_test['W'] = line.split(': ')[1].strip()

            # Branch and Bound
            elif line.startswith('Branch and Bound:'):
                current_test['BB_sol'] = line.split(': ')[1].strip()
                last_method = 'BB'
            elif line.startswith('Tempo Branch and Bound :'):
                current_test['BB_time'] = line.split(': ')[1].strip()
            elif line.startswith('* Memória residente real (RSS):') and '[BB]' in current_test.get('last_section', ''):
                current_test['BB_mem'] = line.split(': ')[1].replace(' KB', '').strip()

            # FPTAS
            elif line.startswith('2-aproximativo-fptas:'):
                current_test['FPTAS_sol'] = line.split(': ')[1].strip()
                last_method = '2-aproximativo-fptas'
            elif line.startswith('Tempo Aproximativo FPTAS:'):
                current_test['FPTAS_time'] = line.split(': ')[1].strip()
            elif line.startswith('* Memória residente real (RSS):') and '[fptas]' in current_test.get('last_section', ''):
                current_test['FPTAS_mem'] = line.split(': ')[1].replace(' KB', '').strip()
                current_test['type'] = name

             # Guloso
            elif line.startswith('2-aproximativo-guloso:'):
                current_test['greedy_sol'] = line.split(': ')[1].strip()
                last_method = '2-aproximativo-guloso'
            elif line.startswith('Tempo Aproximativo Guloso:'):
                current_test['greedy_time'] = line.split(': ')[1].strip()
            elif line.startswith('* Memória residente real (RSS):') and '[guloso]' in current_test.get('last_section', ''):
                current_test['greedy_mem'] = line.split(': ')[1].replace(' KB', '').strip()
                current_test['type'] = name
            
            # Track current section
            elif line.startswith('=== Métricas de Memória'):
                current_test['last_section'] = line
    
    # Add the last test if exists
    if current_test:
        data.append(current_test)
    
    return pd.DataFrame(data)

# Process both files
df_large = parse_file("large_results.txt", "Large")
df_low = parse_file("low_results.txt", "Low")

# Combine both DataFrames
df_combined = pd.concat([df_large, df_low], ignore_index=True)

# Convert scientific notation to normal numbers and clean data
def clean_value(val):
    if not isinstance(val, str):
        return val
    if 'e+' in val:
        return str(float(val))
    return val.replace(' KB', '')

df_combined = df_combined.applymap(clean_value)

# Reorder columns
columns = ['type', 'N', 'W','w_avg','w_max','v_avg','v_max','BB_sol', 'BB_time', 'BB_mem', 
           'FPTAS_sol', 'FPTAS_time', 'FPTAS_mem','greedy_sol', 'greedy_time', 'greedy_mem']
df_combined = df_combined[columns]

# Fill empty values with empty string for better CSV output
df_combined = df_combined.fillna('')

# Save to CSV
df_combined.to_csv('resultados_combinados.csv', index=False)

print("DataFrame consolidado:")
print(df_combined)

DataFrame consolidado:
     type      N      W    w_avg    w_max    v_avg    v_max   BB_sol BB_time  \
0   Large    200    997   498.09      997   711.59     1230     5397    1556   
1   Large    500   2543  513.864      997  494.142      998    28857  338508   
2   Large    500   2543  513.864      997  516.634     1074     4566   47974   
3   Large    500   2517  508.612      998  608.612     1098     7117  264675   
4   Large  10000  49877  503.765     1000  497.907     1000                    
5   Large   1000   5002   505.29     1000  486.504      998                    
6   Large    100    995   503.78      995   500.44      997     9147    1013   
7   Large   2000  10011  505.565     1000  488.868     1000                    
8   Large    200   1008   509.14      995   498.09      997    11238    1036   
9   Large   5000  25016  505.329     1000  494.954     1000                    
10  Large    500   2543  513.864      997  494.142      998    28857  510673   
11  Large  10000 