In [1]:
# Imports
import requests
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt

%matplotlib inline

In [7]:
# XCBrasil homepage
xcbrasil_url = 'http://www.xcbrasil.com.br'

# Use requests to retrieve data from a given URL
xcbrasil_response = requests.get(xcbrasil_url)

# Parse the whole HTML page using BeautifulSoup
xcbrasil_soup = BeautifulSoup(xcbrasil_response.text, 'html.parser')

# Title of the parsed page
xcbrasil_soup.title

<title>www.xcbrasil.com.br - LEONARDO</title>

In [9]:
# We can also get it without the HTML tags
xcbrasil_soup.title.string

'www.xcbrasil.com.br - LEONARDO'

# Collecting Data from Table

In [75]:
flights = xcbrasil_soup.find('table', {'class':'listTable'})

We will use the `.find_all()` method to search the HTML tree for particular tags and get a `list` with all the relevant objects.

In [472]:
lst = []
for row in flights.find_all('tr')[1:-1]:
    s = pd.Series([data.text for data in row.find_all('td')])
    lst.append(s)

In [473]:
# Concatenate the Pandas Series in a DataFrame
data = pd.concat(lst, axis=1).T

In [474]:
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,1,11/06/2022,Guilherme Brasil\nParanã Brasilia - BR [~0.8]\n,1:12,15.9 km,20.5 km,30.7,,,
1,2,11/06/2022,Anderson Xavier de Almeida\nParanã Brasilia -...,0:58,15.1 km,19.4 km,29.03,,,
2,3,11/06/2022,CELIO DELCOR\nBassano - IT\n,1:46,5.5 km,13.4 km,23.47,,,
3,4,11/06/2022,Helton Kraus\nSanto Amaro da Imperatriz - BR\n,1:05,5.5 km,13.8 km,20.75,,,
4,5,11/06/2022,Italo Manasses\nRampa do Camilo Pansini - Al...,0:43,5.1 km,9.8 km,14.73,,,


In [475]:
data = data.drop(columns=[0,7,8,9])
data.head()

Unnamed: 0,1,2,3,4,5,6
0,11/06/2022,Guilherme Brasil\nParanã Brasilia - BR [~0.8]\n,1:12,15.9 km,20.5 km,30.7
1,11/06/2022,Anderson Xavier de Almeida\nParanã Brasilia -...,0:58,15.1 km,19.4 km,29.03
2,11/06/2022,CELIO DELCOR\nBassano - IT\n,1:46,5.5 km,13.4 km,23.47
3,11/06/2022,Helton Kraus\nSanto Amaro da Imperatriz - BR\n,1:05,5.5 km,13.8 km,20.75
4,11/06/2022,Italo Manasses\nRampa do Camilo Pansini - Al...,0:43,5.1 km,9.8 km,14.73


In [476]:
data.columns = ['Date', 'Pilot and Takeoff', 'Duration', 'Straight Distance', 'OLC KM', 'OLC Points']

In [477]:
data.head()

Unnamed: 0,Date,Pilot and Takeoff,Duration,Straight Distance,OLC KM,OLC Points
0,11/06/2022,Guilherme Brasil\nParanã Brasilia - BR [~0.8]\n,1:12,15.9 km,20.5 km,30.7
1,11/06/2022,Anderson Xavier de Almeida\nParanã Brasilia -...,0:58,15.1 km,19.4 km,29.03
2,11/06/2022,CELIO DELCOR\nBassano - IT\n,1:46,5.5 km,13.4 km,23.47
3,11/06/2022,Helton Kraus\nSanto Amaro da Imperatriz - BR\n,1:05,5.5 km,13.8 km,20.75
4,11/06/2022,Italo Manasses\nRampa do Camilo Pansini - Al...,0:43,5.1 km,9.8 km,14.73


In [478]:
df2 = data['Pilot and Takeoff'].str.split('\n', expand=True)
df2.drop(columns=[2], inplace=True)
df2.columns = ['Pilot', 'Location']
df2.head()

Unnamed: 0,Pilot,Location
0,Guilherme Brasil,Paranã Brasilia - BR [~0.8]
1,Anderson Xavier de Almeida,Paranã Brasilia - BR [~0.8]
2,CELIO DELCOR,Bassano - IT
3,Helton Kraus,Santo Amaro da Imperatriz - BR
4,Italo Manasses,Rampa do Camilo Pansini - Alto São Vice... - BR


In [479]:
data = data.join(df2)

In [480]:
data.head()

Unnamed: 0,Date,Pilot and Takeoff,Duration,Straight Distance,OLC KM,OLC Points,Pilot,Location
0,11/06/2022,Guilherme Brasil\nParanã Brasilia - BR [~0.8]\n,1:12,15.9 km,20.5 km,30.7,Guilherme Brasil,Paranã Brasilia - BR [~0.8]
1,11/06/2022,Anderson Xavier de Almeida\nParanã Brasilia -...,0:58,15.1 km,19.4 km,29.03,Anderson Xavier de Almeida,Paranã Brasilia - BR [~0.8]
2,11/06/2022,CELIO DELCOR\nBassano - IT\n,1:46,5.5 km,13.4 km,23.47,CELIO DELCOR,Bassano - IT
3,11/06/2022,Helton Kraus\nSanto Amaro da Imperatriz - BR\n,1:05,5.5 km,13.8 km,20.75,Helton Kraus,Santo Amaro da Imperatriz - BR
4,11/06/2022,Italo Manasses\nRampa do Camilo Pansini - Al...,0:43,5.1 km,9.8 km,14.73,Italo Manasses,Rampa do Camilo Pansini - Alto São Vice... - BR


In [481]:
data = data.drop(columns=['Pilot and Takeoff'])
data.head()

Unnamed: 0,Date,Duration,Straight Distance,OLC KM,OLC Points,Pilot,Location
0,11/06/2022,1:12,15.9 km,20.5 km,30.7,Guilherme Brasil,Paranã Brasilia - BR [~0.8]
1,11/06/2022,0:58,15.1 km,19.4 km,29.03,Anderson Xavier de Almeida,Paranã Brasilia - BR [~0.8]
2,11/06/2022,1:46,5.5 km,13.4 km,23.47,CELIO DELCOR,Bassano - IT
3,11/06/2022,1:05,5.5 km,13.8 km,20.75,Helton Kraus,Santo Amaro da Imperatriz - BR
4,11/06/2022,0:43,5.1 km,9.8 km,14.73,Italo Manasses,Rampa do Camilo Pansini - Alto São Vice... - BR


In [482]:
data = data[['Date', 'Pilot',	'Location', 'Duration', 'Straight Distance', 	'OLC KM', 'OLC Points']]
data.head()

Unnamed: 0,Date,Pilot,Location,Duration,Straight Distance,OLC KM,OLC Points
0,11/06/2022,Guilherme Brasil,Paranã Brasilia - BR [~0.8],1:12,15.9 km,20.5 km,30.7
1,11/06/2022,Anderson Xavier de Almeida,Paranã Brasilia - BR [~0.8],0:58,15.1 km,19.4 km,29.03
2,11/06/2022,CELIO DELCOR,Bassano - IT,1:46,5.5 km,13.4 km,23.47
3,11/06/2022,Helton Kraus,Santo Amaro da Imperatriz - BR,1:05,5.5 km,13.8 km,20.75
4,11/06/2022,Italo Manasses,Rampa do Camilo Pansini - Alto São Vice... - BR,0:43,5.1 km,9.8 km,14.73


In [483]:
data

Unnamed: 0,Date,Pilot,Location,Duration,Straight Distance,OLC KM,OLC Points
0,11/06/2022,Guilherme Brasil,Paranã Brasilia - BR [~0.8],1:12,15.9 km,20.5 km,30.7
1,11/06/2022,Anderson Xavier de Almeida,Paranã Brasilia - BR [~0.8],0:58,15.1 km,19.4 km,29.03
2,11/06/2022,CELIO DELCOR,Bassano - IT,1:46,5.5 km,13.4 km,23.47
3,11/06/2022,Helton Kraus,Santo Amaro da Imperatriz - BR,1:05,5.5 km,13.8 km,20.75
4,11/06/2022,Italo Manasses,Rampa do Camilo Pansini - Alto São Vice... - BR,0:43,5.1 km,9.8 km,14.73
5,11/06/2022,Marcelo Coelho Lobianco,Lousã - PT,0:42,2.6 km,6.7 km,10.05
6,11/06/2022,Jean Pierre Acabado,Mieussy - FR [~17.1],0:39,3.7 km,5.9 km,8.9
7,11/06/2022,Italo Manasses,Rampa do Camilo Pansini - Alto São Vice... - BR,0:21,3.4 km,5.7 km,8.59
8,11/06/2022,Aloisio Cunha,Moedao BH - BR,0:16,2.4 km,5.6 km,8.35
9,11/06/2022,Aloisio Cunha,Moedao BH - BR,0:12,2.5 km,5.0 km,7.54
