In [1]:
from socket import *
# import the "regular expressions" module
import re

In [2]:
def get_http_resource(url, file_name):
    """
    Get an HTTP resource from a server
           Parse the URL and call function to actually make the request.

    :param url: full URL of the resource to get
    :param file_name: name of file in which to store the retrieved resource

    (do not modify this function)
    """

    # Parse the URL into its component parts using a regular expression.
    url_match = re.search('http://([^/:]*)(:\d*)?(/.*)', url)
    url_match_groups = url_match.groups() if url_match else []
    #    print 'url_match_groups=',url_match_groups
    if len(url_match_groups) == 3:
        host_name = url_match_groups[0]
        host_port = int(url_match_groups[1][1:]) if url_match_groups[1] else 80
        host_resource = url_match_groups[2]
        print('host name = {0}, port = {1}, resource = {2}'.format(host_name, host_port, host_resource))
        status_string = do_http_exchange(host_name.encode(), host_port, host_resource.encode(), file_name)
        print('get_http_resource: URL="{0}", status="{1}"'.format(url, status_string))
    else:
        print('get_http_resource: URL parse failed, request not sent')

In [3]:
def extract_message_code(header):
    splitHeader = header.split()
    return splitHeader[1]

In [4]:
def write_to_file_bytes(msg, file_name):
    written = False
    with open(file_name, 'wb') as file:
        file.write(msg)
        written = True
    return written

In [5]:
def write_to_file_txt(msg, file_name):
    written = False
    with open(file_name, 'w') as file:
        file.write(msg)
        written = True
    return written

In [6]:
def do_http_exchange(host, port, resource, file_name):
    """
    Get an HTTP resource from a server

    :param bytes host: the ASCII domain name or IP address of the server machine (i.e., host) to connect to
    :param int port: port number to connect to on server host
    :param bytes resource: the ASCII path/name of resource to get. This is everything in the URL after the domain name,
           including the first /.
    :param file_name: string (str) containing name of file in which to store the retrieved resource
    :return: the status code
    :rtype: int
    """
    # Open a tcp socket
    # Connect the socket to the host on the given port
    tcp_socket = socket(AF_INET, SOCK_STREAM)
    tcp_socket.connect((host, port))

    # Create a request as a bytes object    
    requestLine = b'GET ' + resource + b' HTTP/1.1\r\nHost: ' + host + b'\r\n\r\n'
    print(requestLine)
    
    #This is from stackoverflow. Why do they have it like this?
    stackOverFlowLine = b"GET / HTTP/1.1\nHost: stackoverflow.com\n\n"
    requestalicious = f"GET / HTTP/1.1\r\nHost: {host}:{port}\r\n\r\n".encode()
    
    # Send the request to the host
    tcp_socket.send(requestLine)
    
    # Receive the response for the host
    full_header = ''
    while True:
        if '\r\n' in full_header:
            break
        data = tcp_socket.recv(1)
        
        full_header += data.decode('utf-8')
        
    print(f'full thing: {full_header}')
    
    ## Get the first line of the header first
    ## Extract the message code (e.g. 404, 200)
    
    msg_code = extract_message_code(full_header)
    print(f'message code:  {msg_code}')
        
    
        ## If 200 proceeed to read the rest of the header lines
    flag = True
    if int(msg_code) == 200:
        all_headers = ''
        
        while True:
            if '\r\n\r\n' in all_headers:
                break
            #For each header
            header = ''
            while True:
                
                if '\r\n' in header:
                    break
                data = tcp_socket.recv(1)

                header += data.decode()
                    
                    
            print(f'all header: {header}')
            
            if 'Content-Length' in header:
                contentLen = header
                contentLen = contentLen.split(' ')[1]
                print(contentLen)
                
            
            if 'Transfer-Encoding' in header:
                contentLen = header
                contentLen = contentLen.split( )[1]
                print(contentLen)
            
            all_headers += header
            
            
        # If the header contains the Content-Length, then
        ## Read the number of bytes given by the content length value
        ## save the bytes to a file given by file_name
        if 'Content-Length' in all_headers:
    
            
            message = b''
            while True:
                data = tcp_socket.recv(int(contentLen))
                if len(data) < 1:
                    break
                message = message + data
            print(message)
            write_to_file_bytes(message, file_name)
        
        
        # Else if the header contains the Transfer-Encoding with value chunks
        ## Read each chunk in 
        ## Combine the chunks
        ## Decode the chunks as ASCII
        ## Write the ASCII to a file given by file_name
        if 'Transfer-Encoding' in all_headers:
            msg = ''
            while True:
                
                size = ''
                while True:
                    if '\r\n' in size:
                        break
                    data = tcp_socket.recv(1)
                    size += data.decode()

                if size == '\r\n':
                    tcp_socket.recv(1)
                    break
                chunkSize = int(size, 16)
                print(chunkSize)
                if chunkSize == 0:
                    break
                else:
                    data = tcp_socket.recv(chunkSize)
                    print(data.decode())
                    msg = msg + data.decode()
            write_to_file_txt(msg, file_name)
                    
    return int(msg_code)  # Replace this "server error" with the actual status code

In [7]:
"""
Tests the client on a variety of resources
"""

# These resource request should result in "Content-Length" data transfer
get_http_resource('http://www.httpvshttps.com/check.png', 'check.png')

# this resource request should result in "chunked" data transfer
get_http_resource('http://www.httpvshttps.com/','index.html')

# If you find fun examples of chunked or Content-Length pages, please share them with us!

host name = www.httpvshttps.com, port = 80, resource = /check.png
b'GET /check.png HTTP/1.1\r\nHost: www.httpvshttps.com\r\n\r\n'
full thing: HTTP/1.1 200 OK

message code:  200
all header: Server: nginx

all header: Date: Wed, 06 Oct 2021 13:19:16 GMT

all header: Content-Type: image/png

all header: Content-Length: 1719

1719

all header: Last-Modified: Mon, 30 May 2016 17:51:56 GMT

all header: Connection: keep-alive

all header: ETag: "574c7dbc-6b7"

all header: x-instance: rocket-dallas

all header: x-powered-by: anthum.com

all header: Accept-Ranges: bytes

all header: 

b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x14\x00\x00\x00\x14\x08\x02\x00\x00\x00\x02\xeb\x8aZ\x00\x00\x00\x19tEXtSoftware\x00Adobe ImageReadyq\xc9e<\x00\x00\x03$iTXtXML:com.adobe.xmp\x00\x00\x00\x00\x00<?xpacket begin="\xef\xbb\xbf" id="W5M0MpCehiHzreSzNTczkc9d"?> <x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="Adobe XMP Core 5.3-c011 66.145661, 2012/02/06-14:56:27        "> <rdf:RDF xmlns:rdf="http://www.w