In [1]:
import selectors
import socket
import re
import os
import glob
import datetime
from multiprocessing import Process

In [2]:
# Helpful links
# https://gitlab.com/wireshark/wireshark/-/wikis/Hyper_Text_Transfer_Protocol
# https://babelstone.co.uk/Unicode/whatisit.html
# https://docs.python.org/3/library/re.html
# https://regex101.com/
# https://realpython.com/python-sockets/#handling-multiple-connections

In [3]:
# http protocol requires end of header and end of body to end in \r\n on a newline itself
# this means you'll have \r\n\r\n terminating the last real character of each the header and body
class HttpResponse:
    def __init__(self, response_line='HTTP/1.1 200 OK', headers='', data=''):
        self.response_line = response_line
        self.headers = headers
        self.data = data

    def add_headers(self, headers):
        self.headers.update(headers)

    def get_response(self):
        response = self.get_header() 
        if len(self.data):
            response += self.data #+ b'\r\n\r\n' # content-length is specified... so maybe we don't need this at all?
        return response

    def get_header(self):
        header = self.response_line + '\r\n'
        for key, value in self.headers.items():
            header += key + ': ' + value + '\r\n'
        if len(self.data):
            header += 'Content-Length: %d' % (len(self.data)) + '\r\n'
        header += '\r\n'
        header = header.encode('ascii')
        return header

def guess_mimetype(path):
    extension = re.search('\.(\w+)', path).group(1)
    if extension in ('png', 'jpg'):
        return 'image/%s' % extension
    elif extension == 'html':
        return 'text/html'
    else:
        return 'text/plain'

In [4]:
testdata = b"""<!DOCTYPE HTML>
<html lang="en">
</html>"""

# test HttpResponse return values
mime = guess_mimetype('blabla/subfolder/index.html')
response = HttpResponse(headers={'Server':'WindowsLaptopCustom', 'Content-Type': mime}, data=testdata)
print(response.get_response().decode('ascii'))

HTTP/1.1 200 OK
Server: WindowsLaptopCustom
Content-Type: text/html
Content-Length: 40

<!DOCTYPE HTML>
<html lang="en">
</html>


In [5]:
STOCK_HEADERS = {
    'Server': 'WindowsLaptopCustom',
}

def get_stock_headers():
    headers = STOCK_HEADERS.copy()
    headers.update({ 'Date': str(datetime.datetime.now()) })
    return headers


def get_static_response(path, mime=None):
    ospath = os.path.relpath(path)
    with open(ospath, 'rb') as f:
        data = f.read()
    if not mime:
        mime = guess_mimetype(path)
    data_headers = { 'Content-Type': mime } # Content-Size is automatically generated when data != empty
    response = HttpResponse(data=data, headers=get_stock_headers())
    response.add_headers(data_headers)
    return response


def process_request(raw_data):
    request = raw_data.decode('utf8')
    post = re.match(r'POST', request)
    if post:
        print('POST logged to post.txt')
        with open('post.txt', 'a') as f: # append, I think?
            dashes = '-'*20
            f.write(f"\n{dashes}{str(datetime.datetime.now())}{dashes}\n")
            f.write(request)
        # after a POST, the client still expects a reponse that it succeeded in some form
        # https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/201
        # https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/200
        # https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/204
        response = HttpResponse(response_line='HTTP/1.1 204 FORM RECEIVED', headers=get_stock_headers())
        print(response.get_header().decode('ascii'))
        return response.get_response()

    match = re.match(r'GET ([^ ]+)', request)
    request_path = '/ERROR'
    if match:
        request_path = match.group(1)

    print(request.split('\n')[0]) # show the GET line

    if 'favicon' in request_path:
        response = get_static_response('favicon.png')
        print(response.get_header().decode('ascii'))
        return response.get_response()


    if '/' == request_path:
        response = get_static_response('index.html')
        print(response.get_header().decode('ascii'))
        return response.get_response()

    # check if specified request path exists
    request_path = os.path.relpath(request_path[1:])     
    static_files = glob.glob('**', recursive=True)
    for path in static_files:
        path = os.path.relpath(path)
        if request_path in path:
            response = get_static_response(path)
            print(response.get_header().decode('ascii'))
            return response.get_response()
    
    # send default, or could send a 404 error technically if a specific resource is requested that doesn't exist
    response = get_static_response('index.html')
    print(response.get_header().decode('ascii'))
    return response.get_response()

In [None]:
# https://docs.python.org/3/library/selectors.html#examples

# WEB BROWSER SPAWNS MULTIPLE REQUESTS WHEN READING AN HTML PAGE FOR ALL ITS RESOURCES
# THE SOURCE PORTS ARE DIFFERENT SO THEY ARE SENT OVER DIFFERENT SOCKETS
# THEREFORE, IT IS ESSENTIAL THAT AN HTTP SERVER BE ABLE TO HANDLE MULTIPLE REQUESTS AND CONNECTIONS AT THE SAME TIME!!!

sel = selectors.DefaultSelector()

def accept(sock, mask):
    conn, addr = sock.accept()  # Should be ready
    print('accepted', conn, 'from', addr)
    conn.setblocking(False)
    sel.register(conn, selectors.EVENT_READ, read)

def read(conn, mask):
    try:
        data = conn.recv(4096)  # Should be ready
        if data:
            response = process_request(data)
            if response:
                conn.sendall(response)  # Hope it won't block
        else:
            print('closing', conn)
            sel.unregister(conn)
            conn.close()
    except Exception as e:
        print(f"Exception {e}")
        sel.unregister(conn)
        conn.close()

sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.bind(('', 80))
sock.listen(100) # max number in queue or something to be accepted before being turned away
sock.setblocking(False)
sel.register(sock, selectors.EVENT_READ, accept)

while True:
    events = sel.select() # block until registered objects are ready, return list of (key, events) where key is SelectorKey
    for key, mask in events:
        callback = key.data
        callback(key.fileobj, mask)