In [None]:
import gzip

def extract_packets(filename):
    """
    Extract packet data from a text file and returns a list of hex strings.
    """
    allpackets = []
    with open(filename, 'r') as file:
        packet = ''
        for line in file:
            temp = line.split('  ')
            if len(temp) >= 3:
                if(temp[0].startswith('0000')):
                    packet = ''
                packet += ''.join(temp[1].split(' '))
            if len(temp) == 1:
                allpackets.append(packet)
    return allpackets

def parse_packet(data):
    """
    Parse Ethernet, IP, and TCP headers to extract HTTP payload from raw packet data.
    """
    hexdata = bytes.fromhex(data)
    
    #/* handling frame header */
    ethernet_header_len = 14
    
    #/* handling IP headers */
    ipheader_offset = ethernet_header_len
    ipheader = hexdata[ipheader_offset: ipheader_offset+20]
    ipheader_len = (ipheader[0] & 0x0F)*4
    
    #/* handling TCP segment header */
    tcpheader_offset = ethernet_header_len + ipheader_len
    tcpheader = hexdata[tcpheader_offset: tcpheader_offset+20]
    tcplen = ((tcpheader[12] >> 4) & 0x0F) * 4
    
    httpheader_offset = tcpheader_offset + tcplen
    httpdata = hexdata[httpheader_offset:]
    return httpdata


def extract_http_payload(packets):
    """
     Extract and return the HTTP payload by concatenating packet data and removing headers.
    """
    httpdata = b'' 
    for packet in packets:
        httpdata += parse_packet(packet)
    
    offset = httpdata.find(b'\r\n\r\n') + 4
    mydata = httpdata[offset:]

    return mydata

def decompress_http_data(compressed_data):
    """
    Decompress the HTTP payload data using gzip and return the decompressed content.
    """
    try:
        decompressed_data = gzip.decompress(compressed_data).decode('utf-8')  # Decompress using gzip
        return decompressed_data
    except Exception as e:
        print(f"Decompression failed: {e}")
        return None

def calculate_compression_ratio(compressed_data, decompressed_data):
    """
    Calculate and return the compression ratio. 
    """
    compressed_size = len(compressed_data)
    decompressed_size = len(decompressed_data)
    ratio = decompressed_size / compressed_size if compressed_size > 0 else 0
    return ratio

In [19]:

# Step 1: Read hexdump file
file_name = "htmlhexdump.txt"
packets = extract_packets(file_name)

# Step 2: Extract HTTP payloads
mydata_reassemble = extract_http_payload(packets)
print(f"Total size of HTTP response data (compressed): {len(mydata_reassemble)} bytes")

# Step 3: Decompress the HTTP payload data
decompressed_data = decompress_http_data(mydata_reassemble)

if decompressed_data:
    print(f"Total size of uncompressed HTML content: {len(decompressed_data)} bytes")

    # Step 4: Calculate and print the compression ratio
    compression_ratio = calculate_compression_ratio(mydata_reassemble, decompressed_data)
    print(f"Compression Ratio: {compression_ratio:.2f}")
else:
    print("Decompression failed.")

Total size of HTTP response data (compressed): 31348 bytes
Total size of uncompressed HTML content: 153899 bytes
Compression Ratio: 4.91


In [20]:
print("Decompressed Data:\n", decompressed_data)  # Print the decompressed content

Decompressed Data:
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta name="description" content="Sanchar Nigam Executives' Association , India. This Website contains information about the happenings in SNEA (BSNL Executives Association of India). Major executives of BSNL are the members of this association. This association works for the betterment of BSNL executives" />
<meta name="description" content="" />
<meta name="google" content="notranslate" />
<meta http-equiv="content-type" content="text/html; charset=utf-8" />
<meta name="keywords" content="bsnl,snea,india,sneaindia,muthu kumar, rajan, jogi, sebastin,mtnl, association, executives, telecommunication,its, repatriation, its repatriation, its absorbtion, absorbtion, DOT, Indian Telecom company, Government of India telecom company, Indian Telecom Service, Telecom Engineers, Junior Telecom Officer, JTO, Sub divisi