In [1]:
import pandas as pd
import requests

# Your dictionaries mapping the race names to their IDs
id_circuit_map_2022 = {'bahrain': 1124,
                       'saudi-arabia': 1125,
                       'australia': 1108,
                       'emilia-romagna': 1109,
                       'miami': 1110,
                       'spain': 1111,
                       'monaco': 1112,
                       'azerbaijan': 1126,
                       'canada': 1113,
                       'great-britain': 1114,
                       'austria': 1115,
                       'france': 1116,
                       'hungary': 1117,
                       'belgium': 1118,
                       'netherlands': 1119,
                       'italy': 1120,
                       'singapore': 1133,
                       'japan': 1134,
                       'united-states': 1135,
                       'mexico': 1136,
                       'brazil': 1137,
                       'abu-dhabi': 1138
                       }

id_circuit_map_2023 = {'bahrain': 1141,
                       'saudi-arabia': 1142,
                       'australia': 1143,
                       'azerbaijan': 1207,
                       'miami': 1208,
                       'monaco': 1210,
                       'spain': 1211,
                       'canada': 1212,
                       'austria': 1213,                       
                       'great-britain': 1214,
                       'hungary': 1215,
                       'belgium': 1216#,
                    #    'netherlands': 1217,
                    #    'italy': 1218,
                    #    'singapore': 1219,
                    #    'japan': 1220,
                    #    'qatar': 1221, 
                    #    'united-states': 1222,
                    #    'mexico': 1223,
                    #    'brazil': 1224,
                    #    'las-vegas': 1225,
                    #    'abu-dhabi': 1136
                       }

# Add a list of races with sprint sessions for each year
sprint_races_2022 = [1109, 1151, 1137]
sprint_races_2023 = [1207, 1213, 1216]

# Maps the years to the corresponding dictionaries and sprint races
id_circuit_maps = {
    '2022': (id_circuit_map_2022, sprint_races_2022),
    '2023': (id_circuit_map_2023, sprint_races_2023)
}

all_races = []

# Loop through the years and their respective races
for year, (id_circuit_map, sprint_races) in id_circuit_maps.items():
    for race, race_id in id_circuit_map.items():
        # Determine the number of practice sessions based on whether it's a sprint race
        num_practice_sessions = 2 if (race_id in sprint_races and year == '2022') else 3
        
        # In 2022, if it's a sprint race, skip practice 3
        if year == '2023' and race_id in sprint_races:
            num_practice_sessions = 1
            
        for p in range(1, num_practice_sessions + 1):  # Loop through each practice session
            # Build the URL using the race_id and practice number
            url = f"https://www.formula1.com/en/results.html/{year}/races/{race_id}/{race}/practice-{p}.html"
            
            response = requests.get(url)
            
            # If the page does not exist (404), break the loop and move to the next race
            if response.status_code == 404:
                break
            
            # If the page exists, scrape the table
            tables = pd.read_html(response.text)
            
            # If tables were found, append the first one to the all_races list
            if tables:
                race_tables = tables[0]
                race_tables['Race'] = race  # Add a column for the race name
                race_tables['Year'] = year  # Add a column for the year
                race_tables['Practice Session'] = p  # Add a column for the practice session number
                all_races.append(race_tables)

# Concatenate all the individual DataFrames into one
all_races_df = pd.concat(all_races, ignore_index=True)

# You can now manipulate the all_races_df DataFrame or save it to a file
# For example, to save to a CSV file:
# all_races_df.to_csv('F1_Practice_Sessions_2022_2023.csv', index=False)


In [2]:
all_races_df

Unnamed: 0.1,Unnamed: 0,Pos,No,Driver,Car,Time,Gap,Laps,Unnamed: 8,Race,Year,Practice Session,Unnamed: 7
0,,1,10,Pierre Gasly GAS,AlphaTauri RBPT,1:34.193,,23.0,,bahrain,2022,1,
1,,2,16,Charles Leclerc LEC,Ferrari,1:34.557,+0.364s,22.0,,bahrain,2022,1,
2,,3,55,Carlos Sainz SAI,Ferrari,1:34.611,+0.418s,23.0,,bahrain,2022,1,
3,,4,63,George Russell RUS,Mercedes,1:34.629,+0.436s,23.0,,bahrain,2022,1,
4,,5,1,Max Verstappen VER,Red Bull Racing RBPT,1:34.742,+0.549s,22.0,,bahrain,2022,1,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1843,,16,2,Logan Sargeant SAR,Williams Mercedes,,,4.0,,belgium,2023,1,
1844,,17,18,Lance Stroll STR,Aston Martin Aramco Mercedes,,,5.0,,belgium,2023,1,
1845,,18,10,Pierre Gasly GAS,Alpine Renault,,,2.0,,belgium,2023,1,
1846,,19,31,Esteban Ocon OCO,Alpine Renault,,,2.0,,belgium,2023,1,


In [14]:
# pip install tesseract

Collecting tesseract
  Downloading tesseract-0.1.3.tar.gz (45.6 MB)
                                              0.0/45.6 MB ? eta -:--:--
                                              0.4/45.6 MB 8.9 MB/s eta 0:00:06
                                              0.9/45.6 MB 9.2 MB/s eta 0:00:05
     -                                        1.3/45.6 MB 9.4 MB/s eta 0:00:05
     -                                        1.8/45.6 MB 9.6 MB/s eta 0:00:05
     -                                        2.2/45.6 MB 10.1 MB/s eta 0:00:05
     --                                       2.6/45.6 MB 9.9 MB/s eta 0:00:05
     --                                       3.1/45.6 MB 9.8 MB/s eta 0:00:05
     ---                                      3.5/45.6 MB 9.7 MB/s eta 0:00:05
     ---                                      3.9/45.6 MB 9.5 MB/s eta 0:00:05
     ---                                      4.3/45.6 MB 9.7 MB/s eta 0:00:05
     ----                                     4.8/45.6 MB 9.8 MB/s et

In [5]:
import pytesseract
from PIL import Image

# Correct the path to the tesseract executable:
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'  # Update this line with the correct path if necessary

# Path to your converted PNG file
png_image_path = 'C:\\Users\\Afonso Cadete\\Pictures\\silverstone.png'

# Perform OCR on the converted PNG image
extracted_text = pytesseract.image_to_string(Image.open(png_image_path))

# Print the extracted text
print(extracted_text)


nhl Lire FORMULA 1 ARAMCO BRITISH GRAND PRIX 2023
SILVERSTONE PREVIEW @R2A\7 ARITA

COMPOUNDS CIRCUIT INFORMATION SILVERSTONE CIRCUIT

cl = Qorenoraes SS 52
'& >) Giceosmnce Sy 506.198, It | — |
=

LATERAL ENERGY

FAI HL |

SS Tarcummeninn SD 5.8910.

C HA [CY (1:27.097.

“6 Jui

Az

LONGITUDINAL ENERGY

wae SECTOR 3 pos, TRACTION. TYRE STRESS
MEDIUM Fea 2 say

BRAKING
afl 123 4 5

WTR
Bm 12345 5
TRACK EVOLUTION. DOWNFORCE

wii2s4as

cz €: ASPHALT GRIP

ot a
“@ 18” TYRE ASPHALT ABRASION

bl 6 x Taal) ro SIT peri ig

<= COE




In [4]:
import requests
from PIL import Image
import pytesseract
from io import BytesIO

pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'  # Update this line with the correct path if necessary

# URL of the image
image_url = 'https://media.formula1.com/image/upload/f_auto/q_auto/v1698927994/fom-website/2023/Brazil/19-br23-preview-en.jpg.transform/9col/image.jpg'

# Download the image
response = requests.get(image_url)
img = Image.open(BytesIO(response.content))

# Get basic details about the image
image_details = {
    'format': img.format,
    'mode': img.mode,
    'size': img.size,
}

print("Image Details:", image_details)

# Use Tesseract to do OCR on the image
# Make sure you have Tesseract OCR installed on your system and added to your system's PATH
text = pytesseract.image_to_string(img)

print("Extracted Text:", text)


Image Details: {'format': 'JPEG', 'mode': 'RGB', 'size': (997, 561)}
Extracted Text: MDT | i= FORMULA 1 ROLEX GRANDE PREMIO DE SAO PAULO 2023
INTERLAGOS PREVIE E ik,

=

COMPOUNDS CIRCUIT INFORMATION AUTODROMO JOSE CARLOS PACE
c2 i LATERAL ENERGY
ry Cs i, SSSI A
€ 2) TREE Sy 505.879 yy F— J — 3
SS TREES, 4.309., fae be SS |

TC, (1:10.54.

\
sy
con ce wax>

LONGITUDINAL ENERGY

SECTORS TRACTION Tyme srREss
Te oma [xa 25g
ASPHALT GRIP v4 BRAKING
SECTOR, 2) i TSI,
[s a
( ‘ a? TYRE ASPHALT ABRASION y LATERAL
TOF CAMBER LRT HN STATING PRESSURES jozsaese aa] ) (pov 2a)
>>| a ° M4
TRACK EVOLUTION. DOWNFORCE

F123 45




In [6]:
import cv2
import pytesseract
import numpy as np
import requests
from io import BytesIO

# URL of the image
image_url = 'https://media.formula1.com/image/upload/f_auto/q_auto/v1698927994/fom-website/2023/Brazil/19-br23-preview-en.jpg.transform/9col/image.jpg'

# Download the image using requests
response = requests.get(image_url)
image_bytes = BytesIO(response.content)

# Convert to a numpy array
nparr = np.frombuffer(image_bytes.read(), np.uint8)

# Load the image using OpenCV
original_image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)

# Convert the image to grayscale
gray_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2GRAY)

# Apply thresholding to create a binary image
_, binary_image = cv2.threshold(gray_image, 150, 255, cv2.THRESH_BINARY_INV)

# Find contours to detect text regions
contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

# Sort contours from left-to-right, top-to-bottom (or as needed)
contours = sorted(contours, key=lambda ctr: cv2.boundingRect(ctr)[0] + cv2.boundingRect(ctr)[1] * binary_image.shape[1])

# Initialize an empty list to hold extracted text
extracted_text = []

# Iterate over contours and extract text from each region
for ctr in contours:
    # Get bounding box for each contour
    x, y, w, h = cv2.boundingRect(ctr)
    
    # Crop the region of interest from the binary image
    roi = binary_image[y:y+h, x:x+w]
    
    # Use pytesseract to extract text from the region
    text = pytesseract.image_to_string(roi, config='--psm 6')
    
    # Post-process extracted text, if necessary
    text = text.strip()
    
    # Add extracted text to the list
    if text:
        extracted_text.append(text)

# Now you can interpret the extracted information based on its known structure
# This part is highly specific to the structure of the data in your image

# Print the extracted text
for text in extracted_text:
    print(text)


Poke A fe Pe ey gut REE ATE BAG Ba tent
| Frere | INTERLAGOS PREVIEW
COMPOUNDS CIRCUIT INFORMATION ALISO. HEE CaN OS Bare
c2 Pad Funan oF LAPs —s . LATERAL ENERGY,
_ TNC NSIT
Gaceosrance Sp —_— — —_—
SHARD We nccoro MS “mee -1 1 oom :
ox en se _— baat —
— ent — FIL SERLEODIND. aK —
a se ind TRACTION___rvne sraess
MEDIUM Lias@s as ae
sspuurone__*8_aranng
. seta Liases Tf Wires as
Ww" VEL ASPHALT ABRASION _ LATERAL
rT a ree ae Oe SO
(3) ee eee rueceruren—** _oawnoee
# “ou “ al “ue - L128 4 St wy y23 4 5.


In [7]:
import cv2
import pytesseract
import numpy as np
import requests
from io import BytesIO

# URL of the image
image_url = 'https://media.formula1.com/image/upload/f_auto/q_auto/v1698927994/fom-website/2023/Brazil/19-br23-preview-en.jpg.transform/9col/image.jpg'

# Download the image using requests
response = requests.get(image_url)
image_bytes = BytesIO(response.content)

# Convert to a numpy array and decode
nparr = np.frombuffer(image_bytes.read(), np.uint8)
original_image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)

# Pre-processing steps
# Convert to grayscale
gray_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2GRAY)
# Improve contrast and reduce noise
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
clahe_image = clahe.apply(gray_image)
# Apply adaptive thresholding
binary_image = cv2.adaptiveThreshold(clahe_image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)

# Inverting the image
binary_image = cv2.bitwise_not(binary_image)

# Now, we can apply OCR on the pre-processed image
custom_config = r'--oem 3 --psm 6'
text = pytesseract.image_to_string(binary_image, config=custom_config)

print("Extracted Text:", text)


Extracted Text: Fd eed oa ee SVS RAN DEE REM IDET SAGE BAU LON 025)
Lomede ll Se NER PANG O)S BP REIT EW; BRAZIE,
aimee
COMPOUNDS’) LS CIRCUIT. INFORMATION MMU SSE SUSTSEE CAR OEDEE
©) Sess CONT iste Ba Gee Ey
SZ). Gare ras i ol ee
Lu res en nearer — Atel af} aaa ‘2
ASF HARD wall a ATI Seen | Be:
(65) FS peatlaatiana ‘ Co A eee SLO
\C3Z a ea ERR OEE —) tel tome Fe
@) SIE Sg ree) army NM ore eos
t ie eed ET Or oe ee SGN ot LONGITUDINAL ENERGY SPREE oe oon
nar pena Satis eaatest ) atartertanpa tamara TON FT ses een
Bee MEDIUM SR) ish CAST ea a pie RRS eoare aera 4
nag 7 = See fy eset on ii cacmzsaxeasma ne (0) ¢ fo ES ES
Re (QV | ee ORR
Aa RRR ES SO ae AC se
ASSN JRC NEESER cams | eran
Rea CHS ee
ni, sinisorrii))) Wag esl Bed) Veeascan Go marsa a
_* A lM elf LL ag gE NTE SRS



In [9]:
import cv2
import pytesseract
import numpy as np

# Load the image using OpenCV
image_path = 'C:\\Users\\Afonso Cadete\\Pictures\\bahrain.jpeg'
original_image = cv2.imread(image_path)

# Convert the image to grayscale
gray_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2GRAY)

# Apply thresholding to create a binary image
_, binary_image = cv2.threshold(gray_image, 150, 255, cv2.THRESH_BINARY_INV)

# Find contours to detect text regions
contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

# Sort contours from left-to-right, top-to-bottom (or as needed)
contours = sorted(contours, key=lambda ctr: cv2.boundingRect(ctr)[0] + cv2.boundingRect(ctr)[1] * binary_image.shape[1])

# Initialize an empty list to hold extracted text
extracted_text = []

# Iterate over contours and extract text from each region
for ctr in contours:
    # Get bounding box for each contour
    x, y, w, h = cv2.boundingRect(ctr)
    
    # Crop the region of interest from the binary image
    roi = binary_image[y:y+h, x:x+w]
    
    # Use pytesseract to extract text from the region
    text = pytesseract.image_to_string(roi, config='--psm 6')
    
    # Post-process extracted text, if necessary
    text = text.strip()
    
    # Add extracted text to the list
    if text:
        extracted_text.append(text)

# Now you can interpret the extracted information based on its known structure
# This part is highly specific to the structure of the data in your image

# Print the extracted text
for text in extracted_text:
    print(text)


— Gees sakvire PREVIEW ;

CIRCUIT INFORMATION | ps > a .
; ° ef -
6 meee 5 preven evqution LF cd “4 rl
mm 1 SM Z vo pe:
6 eras sence \ id
cb? 84s, fi 2545]
+a” 6 sgurasagor_ ses “Sa. famagers ) (esnune  }
wes as, bea a es) {ere } \ { }
CM) Ce a fot ferme) (os)
COMPOUNDS
(er (e2 . veeconacrnt se sane

= = 200° e) stipe O

HARD MEDIUM MR an NT :
