In [None]:
!pip install scapy pandas

Collecting scapy
  Downloading scapy-2.7.0-py3-none-any.whl.metadata (5.8 kB)
Downloading scapy-2.7.0-py3-none-any.whl (2.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.6/2.6 MB[0m [31m29.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: scapy
Successfully installed scapy-2.7.0


In [None]:
### Breeze-v0.0.1 Test

#===Imports
from scapy.all import rdpcap, IP, TCP, UDP, DNS, DNSQR
import pandas as pd
from collections import Counter
#===

#===Globals
path = './ping_dns_test.pcapng'
#===

## Testing PCAP access
print(f'[-] Accessing PCAP at path: {"{"}{path}{"}"}`... ', end='')
try:
  packets = rdpcap(path)
  print(f'DONE.')
except Exception as e:
  print(f'\n\t[!] Could not access PCAP at path: `{path}`. Aborting...')
  exit(0)

### Getting information about PCAP
# rows, start/end time, date

[-] Accessing PCAP at path: {./ping_dns_test.pcapng}`... DONE.


In [None]:
for pkt in packets:
  print(pkt)

Ether / IP / TCP 20.69.136.49:443 > 66.71.101.209:54089 FA
Ether / IP / TCP 66.71.101.209:54089 > 20.69.136.49:443 A
Ether / IP / TCP 162.159.130.234:443 > 66.71.101.209:53941 PA / Raw
Ether / IP / TCP 162.159.130.234:443 > 66.71.101.209:53941 PA / Raw
Ether / IP / TCP 66.71.101.209:53941 > 162.159.130.234:443 A
Ether / IP / TCP 162.159.130.234:443 > 66.71.101.209:53941 PA / Raw
Ether / IP / TCP 66.71.101.209:53941 > 162.159.130.234:443 A
Ether / IP / TCP 162.159.130.234:443 > 66.71.101.209:53941 PA / Raw
Ether / IP / TCP 66.71.101.209:53941 > 162.159.130.234:443 A
Ether / IP / TCP 162.159.130.234:443 > 66.71.101.209:53941 PA / Raw
Ether / IP / TCP 66.71.101.209:53941 > 162.159.130.234:443 A
Ether / IP / TCP 162.159.130.234:443 > 66.71.101.209:53941 PA / Raw
Ether / IP / TCP 66.71.101.209:53941 > 162.159.130.234:443 A
Ether / IP / TCP 162.159.130.234:443 > 66.71.101.209:53941 PA / Raw
Ether / IP / TCP 66.71.101.209:53941 > 162.159.130.234:443 A
Ether / IP / TCP 66.71.101.209:53958 > 17

In [None]:
from datetime import datetime

timestamp = packets[0].time
converted_timestamp = datetime.fromtimestamp(timestamp)
print(f'original : {timestamp}')
print(f'converted: {converted_timestamp}')

TypeError: 'EDecimal' object cannot be interpreted as an integer

In [None]:
from datetime import datetime

datetime.fromtimestamp(1770132796.3301687)

datetime.datetime(2026, 2, 3, 15, 33, 16, 330169)

# working time stamp comparison
but without seconds conversion. need to continue with UTC date timestamp.

In [None]:
from datetime import datetime

p0 = packets[0]
pLAST = packets[len(packets) - 1]
p0_t = p0.time
pLAST_t = pLAST.time

delta = pLAST_t - p0_t
print(f'delta p0, pLAST: {float(delta):.6f} seconds')

delta p0, pLAST: 77.720585 seconds


In [None]:
# sample

from scapy.all import rdpcap, IP, TCP, UDP, DNS, DNSQR
import pandas as pd
from collections import Counter

PCAP_FILE = "cap.pcapng"

# ----------------------------
# Load PCAP
# ----------------------------
packets = rdpcap(PCAP_FILE)

rows = []
iocs = {
    "ips": set(),
    "domains": set(),
    "ports": set()
}

# ----------------------------
# Packet parsing
# ----------------------------
for pkt in packets:
    if IP in pkt:
        src_ip = pkt[IP].src
        dst_ip = pkt[IP].dst
        proto = pkt[IP].proto

        src_port = None
        dst_port = None
        dns_query = None

        # TCP
        if TCP in pkt:
            src_port = pkt[TCP].sport
            dst_port = pkt[TCP].dport
            iocs["ports"].add(dst_port)

        # UDP
        elif UDP in pkt:
            src_port = pkt[UDP].sport
            dst_port = pkt[UDP].dport
            iocs["ports"].add(dst_port)

        # DNS
        if pkt.haslayer(DNS) and pkt.haslayer(DNSQR):
            dns_query = pkt[DNSQR].qname.decode(errors="ignore")
            iocs["domains"].add(dns_query)

        # Track IP IOCs
        iocs["ips"].update([src_ip, dst_ip])

        rows.append({
            "src_ip": src_ip,
            "dst_ip": dst_ip,
            "src_port": src_port,
            "dst_port": dst_port,
            "protocol": proto,
            "dns_query": dns_query,
            "packet_len": len(pkt)
        })

for pkt in packets:
  print(pkt)

# ----------------------------
# Build DataFrame
# ----------------------------
df = pd.DataFrame(rows)

# ----------------------------
# Feature analysis
# ----------------------------
top_src_ips = Counter(df["src_ip"]).most_common(10)
top_dst_ips = Counter(df["dst_ip"]).most_common(10)
top_ports = Counter(df["dst_port"].dropna()).most_common(10)
top_domains = Counter(df["dns_query"].dropna()).most_common(10)

# ----------------------------
# Output summary
# ----------------------------
print("\n=== IOC SUMMARY ===")
print(f"Unique IPs: {len(iocs['ips'])}")
print(f"Unique Domains: {len(iocs['domains'])}")
print(f"Unique Ports: {len(iocs['ports'])}")

print("\nTop Source IPs:")
for ip, count in top_src_ips:
    print(f"{ip}: {count}")

print("\nTop Destination IPs:")
for ip, count in top_dst_ips:
    print(f"{ip}: {count}")

print("\nTop Destination Ports:")
for port, count in top_ports:
    print(f"{port}: {count}")

print("\nTop Queried Domains:")
for domain, count in top_domains:
    print(f"{domain}: {count}")

# ----------------------------
# Save for analysis / ML
# ----------------------------
df.to_csv("flow_packet_features.csv", index=False)
```

---

In [3]:
!pip install scapy pandas

Collecting scapy
  Downloading scapy-2.7.0-py3-none-any.whl.metadata (5.8 kB)
Downloading scapy-2.7.0-py3-none-any.whl (2.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.6/2.6 MB[0m [31m30.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: scapy
Successfully installed scapy-2.7.0


In [5]:
### Breeze-v0.0.1 Test

#===Imports
from scapy.all import rdpcap, IP, TCP, UDP, DNS, DNSQR
import pandas as pd
from datetime import datetime

from collections import Counter
#===

#===Globals
path = './ping_dns_test.pcapng'
#===

## Testing PCAP access
print(f'[-] Accessing PCAP at path: {"{"}{path}{"}"}`... ', end='')
try:
  packets = rdpcap(path)
  print(f"typeof packets: {type(packets)}")
  print(f'DONE.')
except Exception as e:
  print(f'\n\t[!] Could not access PCAP at path: `{path}`. Aborting...')
  exit(0)

### Getting information about PCAP
# rows, start/end time, date

number_of_packets = len(packets)
first_packet = packets[0]
last_packet = packets[number_of_packets - 1]

print(datetime.fromtimestamp(1770132796.3301687))

p0 = packets[0]
print(f'type of p0: {type(p0)}')
pLAST = packets[len(packets) - 1]
p0_t = p0.time
pLAST_t = pLAST.time

print(f'timestamp first_packet: {first_packet.time}')
print(f'timestamp last_packet: {last_packet.time}')
# float() for EDecimal conversion. EDecimal is for delta time, float is for datetime.
print(f'UTC converted time first_packet: {datetime.fromtimestamp(float(first_packet.time))}')
print(f'UTC converted time last_packet: {datetime.fromtimestamp(float(last_packet.time))}')
#delta_time_elapsed =
#print(f'delta p0, pLAST: {float(delta):.6f} seconds')

[-] Accessing PCAP at path: {./ping_dns_test.pcapng}`... typeof packets: <class 'scapy.plist.PacketList'>
DONE.
2026-02-03 15:33:16.330169
type of p0: <class 'scapy.layers.l2.Ether'>
timestamp first_packet: 1770132796.3301687
timestamp last_packet: 1770132874.0507542
UTC converted time first_packet: 2026-02-03 15:33:16.330169
UTC converted time last_packet: 2026-02-03 15:34:34.050754


In [6]:
from scapy.all import Packet

class Handler():
  # Packet handler functions
  def __init__(self):
    pass

  def utc_stamp(self, p: Packet):
    return float(p.time) # Return UTC time

In [15]:
from packet_handler import utc_stamp

print(utc_stamp(p0))
print(packets)
print(count(packets))


1770132796.3301687
<ping_dns_test.pcapng: TCP:1758 UDP:52 ICMP:3 Other:0>
1813


In [67]:
from scapy.all import Packet, PacketList
from pandas import DataFrame

def count(pcap: PacketList) -> int:
  return len(pcap)

def utc_stamp(p: Packet):
  return float(p.time) # Return UTC time

### Packet Extraction

def get_ips(pcap: PacketList) -> dict:
  ips = []
  for pkt in pcap:
    if IP not in pkt: # Non-IP packets (VLAN, link layer, etc)
      continue

    pkt_dict = {
        'src_ip': pkt[IP].src,
        'dst_ip': pkt[IP].dst
    }
    ips.append(pkt_dict)

  return ips

# input pcap/ip_dict?
def get_unique_packets(pcap: PacketList, target_feature: str) -> DataFrame:
  if target_feature not in ['src_ip', 'dst_ip']:
    print(f'\n[!] ERROR:\n- `get_unique_packets`:\n-- target_feature: {target_feature} NOT [\'src_ip\', \'dst_ip\']')
    return 0

  ips = get_ips(pcap) # get ips
  df_ips = pd.DataFrame(ips) # convert to df
  unique_ips = df_ips[target_feature].value_counts().to_dict() # store unique ips and freqs in dict
  df_unique_ips = pd.DataFrame.from_dict(unique_ips, orient='index', columns=['freq']) # organize by index and freq
  df_unique_ips.index.name = target_feature # name index (above ips)
  df_unique_ips = df_unique_ips.reset_index() # move `index` to its own column, numerical idx's
  df_unique_ips.index = range(1, len(df_unique_ips) + 1) # idx start 1 not 0

  return df_unique_ips


### Utilities

def hr(char: str = None, size: int = None) -> str:
  if char is None:
    char = '='
  if size is None:
    size = 30
  print(f'{char*size}')


# Try to pull SRC, DST, ULT (ultimate, total) IPS

# test unique ip function, WORKING

``` python
pcap1 = packets
src_df = get_unique_packets(pcap1, 'src_ip')
dst_df = get_unique_packets(pcap1, 'dst_ip')

print(src_df)
hr()
print(dst_df)
```

# Try to pull gen uniq, WORKING

> Knowing this is important identify unexpectedly quite hosts being chattier than expected

``` python
pcap3 = packets
ips = get_ips(pcap3)
src_uniq = get_unique_packets(pcap3, target_feature='src_ip')
dst_uniq = get_unique_packets(pcap3, target_feature='dst_ip')

# to combine, we need to have identical comlumn names:
src_uniq_ren = src_uniq.rename(columns={'src_ip': 'ip', 'freq': 'freq'})
dst_uniq_ren = dst_uniq.rename(columns={'dst_ip': 'ip', 'freq': 'freq'})

gen_uniq_df = pd.concat([src_uniq_ren, dst_uniq_ren], ignore_index=True) # Aligns them linearly -- if src len = 10 and dst len = 14, col1 len now=24
gen_uniq_df = gen_uniq_df.groupby('ip', as_index=False)['freq'].sum()
gen_uniq_df = gen_uniq_df.rename(columns={'freq': 'gen_ip_freq'})

# sort by ascending for loudest endpoint
gen_uniq_df = gen_uniq_df.sort_values(by='gen_ip_freq', ascending=False).reset_index(drop=True)

gen_uniq_df
```

In [None]:
# Try to pull specific unique and freqs of conversations
pcap2 = packets



In [None]:
# Try to combine src and dst dfs for (`src_ip`, `src_ip_freq`, `dst_ip`, `dst_ip_freq`)
how would i do something like union to achieve this?

src_ip_df = (`src_ip`, `freq`) # data inside
dst_ip_df = (`dst_ip`, `freq`) # data inside
all_ip_df = (`src_ip`, `src_ip_freq`, `dst_ip`, `dst_ip_freq`) # combined data inside

In [52]:
from scapy.all import IP, TCP, UDP, ICMP

pcap = packets
ips = []

for p in pcap:
  # Non-IP packets -- ARP, Link-layer, Ethernet control, VLAN tags
  if IP not in p:
    continue

  pkt = {
      'src_ip': p[IP].src,
      'dst_ip': p[IP].dst,
  }

  ips.append(pkt)

df_ips = pd.DataFrame(ips)
print(df_ips)

hr()

# iteration
# for index, row in df_ips.iterrows():
#   print(row['src_ip', row'dst_ip'])
# unique_src_ips = {}
# unique_src_ips_count = 0
# for idx, row in df_ips.iterrows():
#   if row['src_ip'] not in unique_src_ips.keys():
#     unique_src_ips.update({
#         'ip': row['src_ip'],
#         'freq': 1
#     })
#     unique_src_ips_count += 1
#   else:
#     old_freq = unique_src_ips['src_ip']['freq']
#     unique_src_ips.update({
#         'ip': row['src_ip'],
#         'freq': (old_freq + 1)
#     })
#   # print(f'pkt {idx}\n- src_ip: {row['src_ip']}\n- dst_ip: {row['dst_ip']}')
# print(f'unique_src_ips_count: {unique_src_ips_count}')
# ult_ips = {
#     'src_ips': df_ips['src_ip'].unique().tolist(),
#     'dst_ips': df_ips['dst_ip'].unique().tolist(),
#     'test': 'test_val'
# }
unique_src_ips = df_ips['src_ip'].value_counts().to_dict()
df_unique_src_ips = pd.DataFrame.from_dict(unique_src_ips, orient='index', columns=['freq'])
df_unique_src_ips.index.name = 'src_ip'
df_unique_src_ips = df_unique_src_ips.reset_index() # move `index` to its own column, numerical idx's
df_unique_src_ips.index = range(1, len(df_unique_src_ips) + 1) # idx start 1 not 0
print(df_unique_src_ips)
hr(char='*')


for key in ult_ips:
  print(f'Key: {key}')
  for val in ult_ips[key]:
    print(f'- {val}')

               src_ip           dst_ip
0        20.69.136.49    66.71.101.209
1       66.71.101.209     20.69.136.49
2     162.159.130.234    66.71.101.209
3     162.159.130.234    66.71.101.209
4       66.71.101.209  162.159.130.234
...               ...              ...
1808    66.71.101.209    66.71.127.255
1809  162.159.130.234    66.71.101.209
1810  162.159.130.234    66.71.101.209
1811    66.71.101.209  162.159.130.234
1812   150.171.109.71    66.71.101.209

[1813 rows x 2 columns]
             src_ip  freq
1     66.71.101.209   869
2   162.159.130.234   174
3    142.251.179.97   118
4   192.178.218.106   109
5       172.64.41.4   109
6     216.239.36.21    66
7   172.253.139.101    56
8    172.253.63.113    34
9   142.251.179.113    26
10   54.246.206.151    18
11  142.251.163.154    17
12    172.253.63.95    17
13  142.251.167.139    16
14   104.70.250.213    14
15     3.162.112.51    13
16     146.75.37.91    12
17   172.66.168.139    12
18    34.144.254.29    11
19    104.18.