# Test in notebook

In [11]:
import csv
from collections import Counter

def read_log_file(file_path):
    with open(file_path, 'r') as f:
        first_line = f.readline().strip()
        f.seek(0)  # Reset file pointer to the beginning
        if ',' in first_line:
            return [line.strip().split(',') for line in f]
        else:
            return [line.split() for line in f]

def read_lookup_file(file_path):
    with open(file_path, 'r') as f:
        return list(csv.reader(f))

def print_port_protocol_table(log_file):
    logs = read_log_file(log_file)
    combinations = Counter((row[5], row[7]) for row in logs)
    
    print("Port/Protocol Counts:")
    print("Port,Protocol,Count")
    for (port, protocol), count in combinations.items():
        print(f"{port},{protocol},{count}")

def print_category_count_table(log_file, lookup_file):
    logs = read_log_file(log_file)
    lookup = {(row[0], row[1]): row[2] for row in read_lookup_file(lookup_file)}
    
    combinations = Counter((row[5], row[7]) for row in logs)
    categories = Counter(lookup.get((port, 'tcp' if protocol == '6' else 'udp'), 'Untagged')
                         for (port, protocol) in combinations)
    
    print("\nTag Counts:")
    print("Tag,Count")
    for category, count in categories.items():
        print(f"{category},{count}")

In [9]:
space_log_file = 'flow_logs_space.log'
lookup_file = 'category_lookup.csv'


print_port_protocol_table(space_log_file)


Port/Protocol Counts:
Port,Protocol,Count
443,6,1
23,6,1
25,6,1
110,6,1
993,6,1
143,6,1
1024,6,1
80,6,1
1030,6,1
56000,6,1
49321,6,1
49152,6,1
49153,6,1
49154,6,1


In [10]:
print_category_count_table(space_log_file, lookup_file)


Tag Counts:
Tag,Count
sv_P2,1
sv_P1,2
email,3
Untagged,8
