# REGEX

In [None]:
# # Previous method
# hand = open('data/mbox-short.txt')
# for line in hand:
#     line = line.rstrip()
#     if line.find('From:') >= 0:
#         print(line)

# Regex method
import re

hand = open('data/mbox-short.txt')
for line in hand:
    line = line.rstrip()
    if re.search('From:', line):
        print(line)

In [None]:
# Matching and extracting data
x = 'My 2 favourite numbers are 19 and 42'
print(re.findall('[1-9]+', x))
print(re.findall('[a-h]', x))

In [None]:

# Greedy matching
x = 'From: Using the : character'
print(re.findall('^F.+:', x)) # F = From, .+ = one or more characters, : = colon
# Non-greedy matching
print(re.findall('^F.+?:', x)) # F = From, .+ = one or more characters, ? = non-greedy, : = colon


In [None]:
# Fine tuning string extraction
x = 'From stephen.marquard@uct.ac.za Sat Jan  5 09:14:16 2008'
print(re.findall(r'\S+@\S+', x)) # \S = non-whitespace, + = one or more characters
print(re.findall(r'^From (\S+@\S+)', x)) # ^ = start of line, () = extract only the email address
print(re.findall(r'^From .*@([^ ]*)', x)) # .* = any character, zero or more times, () = extract only the domain name
print(re.findall(r'\S+?@\S+', x)) # .* = any character, zero or more times, () = extract only the domain name


In [None]:
hand = open('data/mbox-short.txt')
numlist = list()
for line in hand:
    line = line.rstrip()
    stuff = re.findall('^X-DSPAM-Confidence: ([0-9.]+)', line)
    if len(stuff) != 1: continue
    num = float(stuff[0])
    numlist.append(num)
print('Maximum:', max(numlist))

In [None]:
# Regex assignment
import re
text = open('data/regex_assignment.txt')
numlist = list()
for line in text:
    # line = line.rstrip()
    numbers = re.findall('[0-9]+', line)
    for number in numbers:
        num = int(number)
        numlist.append(num)
print(sum(numlist))

# Networks and sockets

In [18]:
import socket
mysock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
mysock.connect(('data.pr4e.org', 80))
cmd = 'GET http://data.pr4e.org/romeo.txt HTTP/1.0\r\n\r\n'.encode() # encode() converts string to bytes
mysock.send(cmd)

while True:
    data = mysock.recv(512)
    if len(data) < 1:
        break
    print(data.decode(), 
        end=''
    ) # decode() converts bytes to string
mysock.close()

HTTP/1.1 200 OK
Date: Tue, 18 Mar 2025 22:01:02 GMT
Server: Apache/2.4.52 (Ubuntu)
Last-Modified: Sat, 13 May 2017 11:22:22 GMT
ETag: "a7-54f6609245537"
Accept-Ranges: bytes
Content-Length: 167
Cache-Control: max-age=0, no-cache, no-store, must-revalidate
Pragma: no-cache
Expires: Wed, 11 Jan 1984 05:00:00 GMT
Connection: close
Content-Type: text/plain

But soft what light through yonder window breaks
It is the east and Juliet is the sun
Arise fair sun and kill the envious moon
Who is already sick and pale with grief


In [21]:
import socket
mysock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
mysock.connect(('data.pr4e.org', 80))
cmd = 'GET /intro-short.txt HTTP/1.0\r\n\r\n'.encode()
mysock.send(cmd)

while True:
    data = mysock.recv(512)
    if len(data) < 1:
        break
    print(data.decode()) # decode() converts bytes to string
mysock.close()

HTTP/1.1 404 Not Found
Date: Tue, 18 Mar 2025 22:04:40 GMT
Server: Apache/2.4.52 (Ubuntu)
Content-Length: 278
Connection: close
Content-Type: text/html; charset=iso-8859-1

<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
<html><head>
<title>404 Not Found</title>
</head><body>
<h1>Not Found</h1>
<p>The requested URL was not found on this server.</p>
<hr>
<address>Apache/2.4.52 (Ubuntu) Server at do1.dr-chuck.com Port 80</address>
</body></html>



In [None]:
# http:// is the protocol
# www.dr-chuck.com is the host
# /page1.html is the document



### Real-life example

In [None]:
# Example of connecting to an API and extracting data from it

import websocket
import json

# Función que recibe y muestra los datos
def on_message(ws, message):
    data = json.loads(message)
    price = data.get("p", "No data")  # Extrae el precio
    print(f"Precio de Bitcoin: {price} USD")

# Conectarse a la API de Binance (precios en tiempo real)
url = "wss://stream.binance.com:9443/ws/btcusdt@trade"

# Crear conexión WebSocket
ws = websocket.WebSocketApp(url, on_message=on_message)
# ws.run_forever()
