**SCOAP VALUES From .txt**

In [None]:
import os
import chardet
import pandas as pd
import re

# Function to detect file encoding
def detect_encoding(file_path):
    with open(file_path, 'rb') as f:
        raw_data = f.read()
    result = chardet.detect(raw_data)
    return result['encoding']

# Function to extract data table from text using regex
def extract_data_table(file_path, encoding):
    with open(file_path, 'r', encoding=encoding) as file:
        content = file.read()

    # Updated regex pattern to match the data table, including the new patterns
    table_pattern = r'(?:^\S+\s+\d+\s+\d+\s+\d+\s+\d+$)|' \
                    r'(?:^\\[\w\/-]+\s+\d+\s+\d+\s+\d+\s+\d+$)|' \
                    r'(?:^\s*dff.*$)|' \
                    r'(?:^\s*G\d+\s+\d+\s+\d+\s+\d+\s+\d+$)|' \
                    r'(?:^\s*n\d+\s+\d+\s+\d+\s+\d+\s+\d+$)'

    # Find all matches
    table_data = re.findall(table_pattern, content, re.MULTILINE)

    # Split each matched line and create a list of lists
    data_table = [re.split(r'\s+', row.strip()) for row in table_data]

    return data_table

# Directory containing the text files
folder_path = '/content/s35932'  # Adjust this path if necessary

# Initialize an empty list to store DataFrames
dataframes = []
file_list = sorted([f for f in os.listdir(folder_path) if f.endswith('.txt')])

# Process each file in the folder, sorted by filename
for filename in file_list:
    file_path = os.path.join(folder_path, filename)

    # Detect file encoding
    file_encoding = detect_encoding(file_path)
    print(f'Processing file: {filename}, Detected encoding: {file_encoding}')

    try:
        # Extract data table from the text using regex
        data_table = extract_data_table(file_path, file_encoding)

        if data_table:  # Check if data_table is not empty
            # Create DataFrame from the extracted data table
            data = pd.DataFrame(data_table, columns=["Signal", "CC0", "CC1", "SC0", "SC1"])

            # Add a suffix to each column name to identify the file
            data.columns = [f"{col}_{filename}" if col != "Signal" else col for col in data.columns]

            # Append the DataFrame to the list
            dataframes.append(data)
        else:
            print(f"No data found in {filename}")

    except Exception as e:
        print(f"Skipping {filename}: {e}")

def highlight_changes(row, col, filename):
    reference_col = f"{col}_{file_list[0]}"
    if pd.notna(row[reference_col]) and pd.notna(row[f"{col}_{filename}"]) and row[f"{col}_{filename}"] != row[reference_col]:
        return f"{row[f'{col}_{filename}']} (changed)"
    return row[f"{col}_{filename}"]

# Merge all DataFrames on 'Signal' column
if dataframes:
    combined_data = dataframes[0]
    for df in dataframes[1:]:
        combined_data = pd.merge(combined_data, df, on="Signal", how="outer")

    # Apply conditional formatting to highlight changes
    for filename in file_list[1:]:
        for col in ['CC0', 'CC1', 'SC0', 'SC1']:
            combined_data[f"{col}_{filename}"] = combined_data.apply(
                lambda row: highlight_changes(row, col, filename), axis=1
            )

    # Display the combined DataFrame to verify the content
    print(combined_data)

    # Save the combined DataFrame to a CSV file
    output_file = '/content/s35932/output.csv'  # Adjust this path if necessary
    combined_data.to_csv(output_file, index=False)
    print(f'All data successfully saved to {output_file}')
else:
    print("No data files processed.")


Processing file: s35932.txt, Detected encoding: ascii
Processing file: s35932_T000.txt, Detected encoding: ascii
Processing file: s35932_T001.txt, Detected encoding: ascii
Processing file: s35932_T002.txt, Detected encoding: ascii
Processing file: s35932_T003.txt, Detected encoding: ascii
Processing file: s35932_T004.txt, Detected encoding: ascii
Processing file: s35932_T005.txt, Detected encoding: ascii
Processing file: s35932_T006.txt, Detected encoding: ascii
Processing file: s35932_T007.txt, Detected encoding: ascii
Processing file: s35932_T008.txt, Detected encoding: ascii
Processing file: s35932_T009.txt, Detected encoding: ascii
Processing file: s35932_T010.txt, Detected encoding: ascii
Processing file: s35932_T011.txt, Detected encoding: ascii
Processing file: s35932_T012.txt, Detected encoding: ascii
Processing file: s35932_T013.txt, Detected encoding: ascii
Processing file: s35932_T014.txt, Detected encoding: ascii
Processing file: s35932_T015.txt, Detected encoding: ascii
Pr

**Filtering of nodes**

In [None]:
import os
import pandas as pd
import chardet
from xlsxwriter import Workbook  # Make sure xlsxwriter is installed

def detect_encoding(file_path):
    with open(file_path, 'rb') as file:
        result = chardet.detect(file.read())
    return result['encoding']

def extract_nodes_from_text(text):
    effect_nodes = []
    activation_nodes = []

    lines = text.split('\n')
    effect_section = False
    activation_section = False

    for line in lines:
        if 'Effect:' in line:
            effect_section = True
            activation_section = False
            continue
        if 'Activation Condition:' in line:
            effect_section = False
            activation_section = True
            continue
        if 'TROJAN BODY:' in line:
            effect_section = False
            activation_section = False
            continue

        if effect_section:
            if line.strip().startswith(('n', 'N', 'g', 'G')):
                node = line.strip().split()[0]
                effect_nodes.append(node)

        if activation_section:
            if line.strip().startswith(('n', 'N', 'g', 'G')):
                nodes = line.strip().split()
                activation_nodes.extend(nodes)

    return effect_nodes, activation_nodes

def extract_values_from_csv(input_csv, nodes, filename):
    df = pd.read_csv(input_csv)
    # Define possible column name patterns based on filename
    columns_to_extract = [f'CC0_{filename}', f'CC1_{filename}', f'SC0_{filename}', f'SC1_{filename}']

    # Filter data based on nodes and selected columns
    selected_columns = [col for col in columns_to_extract if col in df.columns]
    df = df[['Signal'] + selected_columns]

    df.reset_index()

    # Set 'Signal' column as the index
    df.set_index('Signal', inplace=True)

    # Filter data based on nodes
    extracted_data = df[df.index.isin(nodes)]
    return extracted_data.reset_index()  # Reset index to keep 'Signal' as a column

def main():
    log_folder = '/content/s35392'
    output_excel_file = '/content/s35392/s35392.xlsx'
    csv_file = '/content/s35932_SCOAP_VALUES.csv'

    # Create an ExcelWriter object
    with pd.ExcelWriter(output_excel_file, engine='xlsxwriter') as writer:
        for filename in sorted(os.listdir(log_folder)):
            if filename.endswith('.txt'):
                input_text_file = os.path.join(log_folder, filename)

                # Detect the encoding of the input text file
                encoding = detect_encoding(input_text_file)
                print(f"Processing {filename} with encoding: {encoding}")

                # Read the input text file with the detected encoding
                with open(input_text_file, 'r', encoding=encoding) as file:
                    text = file.read()

                # Extract node values from the text
                effect_nodes, activation_nodes = extract_nodes_from_text(text)
                print(f"Effect Nodes: {effect_nodes}")
                print(f"Activation Nodes: {activation_nodes}")

                # Read the input CSV file and extract the relevant data
                effect_data = extract_values_from_csv(csv_file, effect_nodes, filename)
                activation_data = extract_values_from_csv(csv_file, activation_nodes, filename)

                # Combine both sets of data into a single DataFrame
                combined_data = pd.concat([effect_data, activation_data], ignore_index=True)

                # Create a new sheet for the log file in the Excel workbook
                sheet_name = os.path.splitext(filename)[0]  # Use full filename as sheet name
                combined_data.to_excel(writer, sheet_name=sheet_name, index=False)

                print(f"Data from {filename} has been processed and saved to {sheet_name} sheet.")

    print(f"All data has been saved to {output_excel_file}")

if __name__ == "__main__":
    main()
