In [None]:
import csv

def preprocess_files(lookup_table_file, protocol_list_file, log_file):
    """
    Preprocess the input files to generate necessary dictionaries for the response.
    
    Args:
        lookup_table_file (String): Path to the lookup table CSV file.
        protocol_list_file (String): Path to the protocol list CSV file.
        log_file (String): Path to the .txt file.

    Returns:
        port_protocol_tags (dict): Dictionary of port-protocol-tags with key being a tuple of port and protocol and value being an array of tag and count.
        port_protocol (dict): Dictionary of ports and their corresponding protocols.
        tags (dict): Dictionary of tag counts with key being the tag and value being the count. 
    """
    port_protocol_tags = {}

    # Read the lookup table CSV and create a dictionary of port-protocol-tags
    with open(lookup_table_file, mode='r') as file:
        lookup_csv = csv.DictReader(file)
        for row in lookup_csv:
            dstport = int(row['dstport'])
            protocol = row['protocol']
            tag = row['tag']
            port_protocol_tags[(dstport, protocol)] = [tag, 0]
    # Debugging
    print(f"Port-Protocol-Tags: {port_protocol_tags}")

    port_protocol = {}

    # Read the protocol list CSV and create a dictionary
    with open(protocol_list_file, mode='r') as file:
        protocol_csv = csv.DictReader(file)
        for row in protocol_csv:
            port = row['Decimal']
            protocol = row['Keyword']
            port_protocol[port] = protocol
    # Debugging
    print(f"Port-Protocol: {port_protocol}")

    tags = {}
    tags["Untagged"] = 0

    # Read the log file and update dictionaries
    with open(log_file, 'r') as file:
        for line in file:
            parts = line.split()
            if len(parts):
                # Extracting the destination port and protocol from the log file
                vpc_dstport = int(parts[6])
                vpc_protocol = parts[7]
                
                # Check if the protocol is in the port_protocol dictionary
                if vpc_protocol in port_protocol:
                    vpc_protocol = port_protocol[vpc_protocol].lower()
                # Check if the port and protocol are in the port_protocol_tags dictionary
                if (vpc_dstport, vpc_protocol) in port_protocol_tags:
                    port_protocol_tags[(vpc_dstport, vpc_protocol)][1] += 1
                    tag = port_protocol_tags[(vpc_dstport, vpc_protocol)][0]
                    if tag not in tags:
                        tags[tag] = 1
                    else:
                        tags[tag] += 1
                else:
                    tags["Untagged"]+=1

                
    # Debugging
    print(f"Tags: {tags}")

    return port_protocol_tags, port_protocol, tags

def generate_response_files(port_protocol_tags, tags, tag_count_file, port_protocol_count_file):
    """
    Generate response files based on the processed data and the dictionaries.

    Args:
        port_protocol_tags (Dictionary): Dictionary of port-protocol-tags with key being a tuple of port and protocol and value being an array of tag and count.
        tags (Dictionary): Dictionary of tag counts with key being the tag and value being the count.
        tag_count_file (String): Path to save the tag count CSV file.
        port_protocol_count_file (String): Path to save the port-protocol count CSV file.

    Returns:
        Creates 2 CSV files: one for tag counts and one for port-protocol counts.
    """
    # Save tag count results to CSV
    with open(tag_count_file, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["Tag", "Count"])
        for key, value in tags.items():
            writer.writerow([key, value])

    print(f"Count of matches for each tag saved in: {tag_count_file}")

    # Save port-protocol count results to CSV
    with open(port_protocol_count_file, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["Port", "Protocol", "Count"])
        for (port, protocol), (tag, count) in port_protocol_tags.items():
            writer.writerow([port, protocol, count])

    print(f"Count of matches for each port-protocol saved in: {port_protocol_count_file}")

# Main execution
if __name__ == "__main__":
    lookup_table_file = 'lookup_table.csv'
    protocol_list_file = 'protocol_list.csv'
    log_file = 'vpc_flow_log.txt'
    result_tag_count_file = 'result_tag_count.csv'
    result_port_protocol_count_file = 'result_port_protocol_count.csv'

    port_protocol_tags, port_protocol, tags = preprocess_files(lookup_table_file, protocol_list_file, log_file)
    generate_response_files(port_protocol_tags, tags, result_tag_count_file, result_port_protocol_count_file)


Port-Protocol-Tags: {(25, 'tcp'): ['sv_P1', 0], (68, 'udp'): ['sv_P2', 0], (23, 'tcp'): ['sv_P1', 0], (31, 'udp'): ['sv_P3', 0], (443, 'tcp'): ['sv_P2', 0], (22, 'tcp'): ['sv_P4', 0], (3389, 'tcp'): ['sv_P5', 0], (0, 'icmp'): ['sv_P5', 0], (110, 'tcp'): ['email', 0], (993, 'tcp'): ['email', 0], (143, 'tcp'): ['email', 0]}
Port-Protocol: {'0': 'HOPOPT', '1': 'ICMP', '2': 'IGMP', '3': 'GGP', '4': 'IPv4', '5': 'ST', '6': 'TCP', '7': 'CBT', '8': 'EGP', '9': 'IGP', '10': 'BBN-RCC-MON', '11': 'NVP-II', '12': 'PUP', '13': 'ARGUS (deprecated)', '14': 'EMCON', '15': 'XNET', '16': 'CHAOS', '17': 'UDP', '18': 'MUX', '19': 'DCN-MEAS', '20': 'HMP', '21': 'PRM', '22': 'XNS-IDP', '23': 'TRUNK-1', '24': 'TRUNK-2', '25': 'LEAF-1', '26': 'LEAF-2', '27': 'RDP', '28': 'IRTP', '29': 'ISO-TP4', '30': 'NETBLT', '31': 'MFE-NSP', '32': 'MERIT-INP', '33': 'DCCP', '34': '3PC', '35': 'IDPR', '36': 'XTP', '37': 'DDP', '38': 'IDPR-CMTP', '39': 'TP++', '40': 'IL', '41': 'IPv6', '42': 'SDRP', '43': 'IPv6-Route', '44'