<a href="https://colab.research.google.com/github/Atharv24-Atreus/Cyber_Security-/blob/main/cyberlab2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
pip install scapy

Collecting scapy
  Downloading scapy-2.6.1-py3-none-any.whl.metadata (5.6 kB)
Downloading scapy-2.6.1-py3-none-any.whl (2.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m23.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: scapy
Successfully installed scapy-2.6.1


1.	Write a Python program that extracts and analyzes TCP flows from a network capture file. The program should:
•	Identify all unique TCP flows (defined as a unique combination of source IP, source port, destination IP, and destination port).
•	Count the number of packets and the total byte size for each TCP flow.
•	Print the summary of TCP flows, including the source IP, destination IP, source port, destination port, packet count, and total byte size.


Filters packets that contain both TCP and IP layers.
Extracts source and destination IPs/ports.
This script analyzes a PCAP file and extracts TCP flows. It identifies the top 10 most active connections based on packet count and data transfer. This helps in detecting network activity patterns, high-traffic flows, and potential anomalies.

In [None]:
from scapy.all import rdpcap, IP, TCP
from collections import defaultdict

# Path to the pcap file (update if necessary)
pcap_file_path = "Sample.pcap"

# Read packets from the pcap file
packets = rdpcap(pcap_file_path)

# Dictionary to store TCP flows
tcp_flows = defaultdict(lambda: {"packet_count": 0, "total_bytes": 0})

# Process each packet
for packet in packets:
    if packet.haslayer(TCP) and packet.haslayer(IP):
        # Extract flow details
        src_ip = packet[IP].src
        dst_ip = packet[IP].dst
        src_port = packet[TCP].sport
        dst_port = packet[TCP].dport

        # Create a unique flow key (sorted to track both directions)
        flow_key = (src_ip, src_port, dst_ip, dst_port)

        # Update packet count and total byte size
        tcp_flows[flow_key]["packet_count"] += 1
        tcp_flows[flow_key]["total_bytes"] += len(packet)

# Sort flows by packet count in descending order and get the top 10
top_tcp_flows = sorted(tcp_flows.items(), key=lambda x: x[1]["packet_count"], reverse=True)[:10]

# Print the summary of TCP flows
print(f"{'Source IP':<15}{'Source Port':<12}{'Destination IP':<15}{'Dest Port':<10}{'Packets':<10}{'Bytes'}")
print("=" * 70)

for flow, stats in tcp_flows.items():
    src_ip, src_port, dst_ip, dst_port = flow
    print(f"{src_ip:<15}{src_port:<12}{dst_ip:<15}{dst_port:<10}{stats['packet_count']:<10}{stats['total_bytes']}")



Source IP      Source Port Destination IP Dest Port Packets   Bytes
10.11.12.101   49157       104.124.60.139 80        5         379
104.124.60.139 80          10.11.12.101   49157     5         453
10.11.12.101   49158       80.85.159.236  80        78        4556
80.85.159.236  80          10.11.12.101   49158     168       246994
10.11.12.101   49159       172.217.2.238  80        6         1069
172.217.2.238  80          10.11.12.101   49159     4         2116
10.11.12.101   49161       172.217.1.132  80        7         1228
10.11.12.101   49162       172.217.1.132  80        7         1192
172.217.1.132  80          10.11.12.101   49161     5         3765
172.217.1.132  80          10.11.12.101   49162     7         7042
10.11.12.101   49163       72.21.81.200   443       11        1341
10.11.12.101   49164       72.21.81.200   443       15        1381
10.11.12.101   49165       72.21.81.200   443       10        884
10.11.12.101   49166       72.21.81.200   443       13        

2.	Create a Python program that extracts IP addresses from a network capture file and determines their geolocation. The program should:
•	Parse the packets to extract source IP addresses.
•	Use an IP geolocation API (such as ipinfo.io, ipapi, or a similar service) to get the country, region, and city of each IP address.
•	List the IP addresses along with their corresponding geolocation information and count the number of unique IPs for each location.


This script analyzes a PCAP file to extract unique external IP addresses and determines their geographical location using an IP geolocation API. It helps in identifying the origin of network traffic, which is useful for security analysis, threat detection, and monitoring suspicious connections

In [None]:
import requests
from scapy.all import rdpcap, IP
from collections import defaultdict

# Path to the pcap file
pcap_file_path = "Sample.pcap"

# Read packets from the pcap file
packets = rdpcap(pcap_file_path)

# Set to store unique external IPs
ip_addresses = set()

# Function to check if an IP is private/internal
def is_private_ip(ip):
    private_ranges = [
        "10.", "172.16.", "172.17.", "172.18.", "172.19.",
        "172.20.", "172.21.", "172.22.", "172.23.",
        "172.24.", "172.25.", "172.26.", "172.27.",
        "172.28.", "172.29.", "172.30.", "172.31.", "192.168."
    ]
    return any(ip.startswith(prefix) for prefix in private_ranges)

# Extract source IPs from packets
for packet in packets:
    if packet.haslayer(IP):
        src_ip = packet[IP].src
        if not is_private_ip(src_ip):  # Exclude private IPs
            ip_addresses.add(src_ip)

# Dictionary to store geolocation results
geo_data = defaultdict(int)

# Function to get geolocation of an IP using ip-api.com
def get_geolocation(ip):
    url = f"http://ip-api.com/json/{ip}"
    try:
        response = requests.get(url, timeout=5)
        data = response.json()
        if data["status"] == "success":
            return f"{data['country']}, {data['regionName']}, {data['city']}"
    except Exception as e:
        print(f"Error fetching geolocation for {ip}: {e}")
    return "Unknown Location"

# Fetch geolocation data for each IP
for ip in ip_addresses:
    location = get_geolocation(ip)
    geo_data[location] += 1

# Print results
print(f"{'IP Address':<20}{'Location':<40}")
print("=" * 60)
for ip in ip_addresses:
    print(f"{ip:<20}{get_geolocation(ip):<40}")

# Print summary of unique locations
print("\nUnique Locations Count:")
print("=" * 40)
for location, count in geo_data.items():
    print(f"{location:<30} {count} IPs")


IP Address          Location                                
94.140.114.6        Latvia, Rīga, Riga                      
172.217.6.132       United States, Texas, Dallas            
5.61.34.51          Germany, Hesse, Frankfurt am Main       
194.87.147.244      Russia, Moscow Oblast, Korolyov         
124.217.255.96      Malaysia, Kuala Lumpur, Kuala Lumpur    
216.58.194.78       United States, California, Mountain View
72.21.81.200        United States, California, Los Angeles  
104.124.60.139      United States, Massachusetts, Cambridge 
85.143.219.95       Russia, St.-Petersburg, St Petersburg   
216.58.194.142      United States, California, Mountain View
80.85.159.236       Russia, Chelyabinsk Oblast, Chelyabinsk 
172.217.12.69       United States, California, Mountain View
208.67.222.222      United States, California, San Francisco
172.217.1.132       United States, Georgia, Atlanta         
172.217.2.238       United States, Texas, Dallas            

Unique Locations Count:

3.	Develop a Python program to monitor and report suspicious port scanning activity based on a network capture file. The program should:
•	Detect when a single source IP address attempts to connect to multiple ports on a destination IP address in a short time frame (e.g., within 5 seconds).
•	Flag such activity as potential port scanning and print a report that includes the source IP, destination IP, the range of ports scanned, and the timestamp of the first and last detected scan.


In [10]:
from scapy.all import rdpcap, IP, TCP
from collections import defaultdict
import datetime

# Path to the PCAP file
pcap_file_path = "Sample.pcap"

try:
    # Read packets from the PCAP file
    packets = rdpcap(pcap_file_path)
except FileNotFoundError:
    print("Error: PCAP file not found.")
    exit()

# Dictionary to store scanning activity: { (src_ip, dst_ip) : [(timestamp, dst_port)] }
scan_attempts = defaultdict(list)

# Time threshold for detecting port scans (5 seconds)
TIME_WINDOW = 5

# Process each packet
for packet in packets:
    if packet.haslayer(IP) and packet.haslayer(TCP):
        if packet[TCP].flags == 2:  # SYN flag set (connection attempt)
            src_ip = packet[IP].src
            dst_ip = packet[IP].dst
            dst_port = packet[TCP].dport
            timestamp = packet.time  # Packet timestamp

            # Store scanning attempt
            scan_attempts[(src_ip, dst_ip)].append((timestamp, dst_port))

# Store results in a list (for Colab output)
results = []
results.append(f"{'Source IP':<18}{'Destination IP':<18}{'Ports Scanned':<30}{'First Scan':<20}{'Last Scan'}")
results.append("=" * 100)

# Detect potential port scanning
for (src_ip, dst_ip), attempts in scan_attempts.items():
    attempts.sort()

    scanned_ports = []
    first_time = attempts[0][0]
    last_time = attempts[0][0]

    for i in range(len(attempts)):
        timestamp, dst_port = attempts[i]

        # Check if port scanning occurs within the time window
        if timestamp - first_time <= TIME_WINDOW:
            scanned_ports.append(dst_port)
            last_time = timestamp
        else:
            # If a time gap is found, report previous scan and reset tracking
            if len(scanned_ports) > 5:  # Threshold: More than 5 ports scanned
                first_time_str = datetime.datetime.fromtimestamp(first_time).strftime('%Y-%m-%d %H:%M:%S')
                last_time_str = datetime.datetime.fromtimestamp(last_time).strftime('%Y-%m-%d %H:%M:%S')
                results.append(f"{src_ip:<18}{dst_ip:<18}{str(scanned_ports):<30}{first_time_str:<20}{last_time_str}")

            # Reset tracking for new scan burst
            scanned_ports = [dst_port]
            first_time = timestamp
            last_time = timestamp

    # Print remaining detected scan
    if len(scanned_ports) > 5:
        first_time_str = datetime.datetime.fromtimestamp(first_time).strftime('%Y-%m-%d %H:%M:%S')
        last_time_str = datetime.datetime.fromtimestamp(last_time).strftime('%Y-%m-%d %H:%M:%S')
        results.append(f"{src_ip:<18}{dst_ip:<18}{str(scanned_ports):<30}{first_time_str:<20}{last_time_str}")

# Print all results at once
if len(results) > 2:
    for line in results:
        print(line)
else:
    print("No port scanning activity detected.")


No port scanning activity detected.


4.	Write a Python script to extract the originating IP address from an email header and use an IP geolocation API to find the approximate location of the email sender. The program should handle cases where the sender's IP is masked or hidden.

In [9]:
from scapy.all import rdpcap, IP
import requests

def extract_ip_from_pcap(file_path):
    """Extract source IP addresses from a PCAP file."""
    packets = rdpcap(file_path)
    ip_addresses = set()

    for packet in packets:
        if IP in packet:
            ip_addresses.add(packet[IP].src)

    return list(ip_addresses)

def is_private_ip(ip):
    """Check if an IP address is private."""
    private_ranges = [
        ("10.",),
        ("172.", range(16, 32)),
        ("192.168.",)
    ]

    for prefix in private_ranges:
        if isinstance(prefix, tuple) and len(prefix) > 1 and isinstance(prefix[1], range): # Check if tuple has more than one element
            if ip.startswith(prefix[0]) and int(ip.split(".")[1]) in prefix[1]:
                return True
        elif ip.startswith(prefix[0]):
            return True
    return False

def get_geolocation(ip):
    """Fetch geolocation data for an IP address using an external API."""
    try:
        response = requests.get(f'http://ip-api.com/json/{ip}')
        data = response.json()
        if data['status'] == 'success':
            return {
                "IP": ip,
                "Country": data.get("country"),
                "Region": data.get("regionName"),
                "City": data.get("city"),
                "ISP": data.get("isp"),
                "Latitude": data.get("lat"),
                "Longitude": data.get("lon")
            }
        else:
            return {"Error": "Could not retrieve location"}
    except requests.RequestException:
        return {"Error": "API request failed"}

if __name__ == "__main__":
    pcap_file = "Sample.pcap"  # Changed the path to Sample.pcap.
    # Please ensure the file 'Sample.pcap' exists in the same directory as your script or provide the correct path

    ip_list = extract_ip_from_pcap(pcap_file)
    public_ips = [ip for ip in ip_list if not is_private_ip(ip)]

    if public_ips:
        for ip in public_ips:
            print(f"Extracted Public IP: {ip}")
            location_info = get_geolocation(ip)
            print("Geolocation Data:", location_info)
    else:
        print("No public IP addresses found in the PCAP file.")

Extracted Public IP: 94.140.114.6
Geolocation Data: {'IP': '94.140.114.6', 'Country': 'Latvia', 'Region': 'Rīga', 'City': 'Riga', 'ISP': 'Sia Nano IT', 'Latitude': 56.921, 'Longitude': 24.1698}
Extracted Public IP: 172.217.1.132
Geolocation Data: {'IP': '172.217.1.132', 'Country': 'United States', 'Region': 'Georgia', 'City': 'Atlanta', 'ISP': 'Google LLC', 'Latitude': 33.7488, 'Longitude': -84.3877}
Extracted Public IP: 172.217.12.69
Geolocation Data: {'IP': '172.217.12.69', 'Country': 'United States', 'Region': 'California', 'City': 'Mountain View', 'ISP': 'Google LLC', 'Latitude': 37.4225, 'Longitude': -122.085}
Extracted Public IP: 208.67.222.222
Geolocation Data: {'IP': '208.67.222.222', 'Country': 'United States', 'Region': 'California', 'City': 'San Francisco', 'ISP': 'Cisco OpenDNS, LLC', 'Latitude': 37.7642, 'Longitude': -122.3993}
Extracted Public IP: 172.217.6.132
Geolocation Data: {'IP': '172.217.6.132', 'Country': 'United States', 'Region': 'Texas', 'City': 'Dallas', 'ISP'