In [1]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

### Pulling down the Data

In [2]:
r = requests.get('https://www.datasciencenigeria.org/2020-bootcamp-attendees/')
r.content

b'<!DOCTYPE html>\n<html lang="en-US">\n<head>\n\t<meta charset="UTF-8">\n\t\t\n\t<!-- This site is optimized with the Yoast SEO plugin v14.4.1 - https://yoast.com/wordpress/plugins/seo/ -->\n\t<title>2020 Bootcamp Attendees - Data Science Nigeria</title>\n\t<meta name="description" content="The Data Science Nigeria Artificial Intelligence (AI) bootcamp, an all-expenses-paid learning bootcamp that builds Nigeria\xe2\x80\x99s capacity in the use of advanced machine learning and deep learning concepts and drives the application of AI for socio-economic development. The bootcamp is driven by a broader strategic intent to accelerate Nigeria\xe2\x80\x99s development through the solution-oriented application of machine learning to solve social and business problems, and to galvanize a data science knowledge revolution improving employability, technological innovations and sustainable socio-economic development. This reinforces the DSN\xe2\x80\x99s vision to create one million new jobs in the

In [3]:
from bs4 import BeautifulSoup4

soup = BeautifulSoup(r.content, 'html5lib')

In [7]:
table = soup.select('tr[class*=row]')
table

[<tr class="row-1 odd">
 	<th class="column-1">S/N</th><th class="column-2">Bootcamp ID</th><th class="column-3">Name</th><th class="column-4">Gender</th><th class="column-5">Community</th>
 </tr>,
 <tr class="row-2 even">
 	<td class="column-1">1</td><td class="column-2">DSNAIB001</td><td class="column-3">Alexander Ifenaike</td><td class="column-4">Male</td><td class="column-5">Campus- AI+ UI</td>
 </tr>,
 <tr class="row-3 odd">
 	<td class="column-1">2</td><td class="column-2">DSNAIB002</td><td class="column-3">Busayo Awobade</td><td class="column-4">Male</td><td class="column-5">Campus- AI+ FUNAAB</td>
 </tr>,
 <tr class="row-4 even">
 	<td class="column-1">3</td><td class="column-2">DSNAIB003</td><td class="column-3">Ahmed Adegunle</td><td class="column-4">Male</td><td class="column-5">City- AI+ Ado-Ekiti</td>
 </tr>,
 <tr class="row-5 odd">
 	<td class="column-1">4</td><td class="column-2">DSNAIB004</td><td class="column-3">Adepoju Oluwadara</td><td class="column-4">Female</td><td

In [6]:
len(table)

480

In [8]:
table[0]

<tr class="row-1 odd">
	<th class="column-1">S/N</th><th class="column-2">Bootcamp ID</th><th class="column-3">Name</th><th class="column-4">Gender</th><th class="column-5">Community</th>
</tr>

In [21]:
table[1]

<tr class="row-2 even">
	<td class="column-1">1</td><td class="column-2">Alexander Ifenaike</td><td class="column-3">Male</td><td class="column-4">Campus- AI+ UI</td>
</tr>

In [33]:
table[1].select('td[class=column-2]')

[<td class="column-2">Alexander Ifenaike</td>]

In [32]:
table[1].select('td[class=column-2]')[0]

<td class="column-2">Alexander Ifenaike</td>

In [9]:
table[1].select('td[class=column-1]')[0].string

'1'

In [13]:
print(table[1].select('td[class=column-2]')[0].string)
print(table[1].select('td[class=column-3]')[0].string)
print(table[1].select('td[class=column-4]')[0].string)
print(table[1].select('td[class=column-5]')[0].string)

DSNAIB001
Alexander Ifenaike
Male
Campus- AI+ UI


Putting it all together;

In [19]:
def parse_data(table):
    table_list = []
    
    for index in range(1, len(table)):
        current_row = table[index]
        
        position = current_row.select('td[class=column-1]')[0].string
        bootcamp_id = current_row.select('td[class=column-2]')[0].string
        name = current_row.select('td[class=column-3]')[0].string
        gender = current_row.select('td[class=column-4]')[0].string
        community = current_row.select('td[class=column-5]')[0].string

        table_list.append([position, bootcamp_id, name, gender, community])    
    return table_list

In [20]:
data = parse_data(table)
data

[['1', 'DSNAIB001', 'Alexander Ifenaike', 'Male', 'Campus- AI+ UI'],
 ['2', 'DSNAIB002', 'Busayo Awobade', 'Male', 'Campus- AI+ FUNAAB'],
 ['3', 'DSNAIB003', 'Ahmed Adegunle', 'Male', 'City- AI+ Ado-Ekiti'],
 ['4', 'DSNAIB004', 'Adepoju Oluwadara', 'Female', 'City- AI+ Abeokuta'],
 ['5', 'DSNAIB005', 'Imonmion Emmanuel Bright', 'Male', 'Campus- AI+ UNIBEN'],
 ['6', 'DSNAIB006', 'Emelike Caleb', 'Male', None],
 ['7', 'DSNAIB007', 'Cadeton Precious', 'Female', 'Campus- AI+ AAU'],
 ['8', 'DSNAIB008', 'Ayobami Akomolafe', 'Male', 'City- AI+ Ado-Ekiti'],
 ['9', 'DSNAIB009', 'Adejumobi Joshua', 'Female', 'Campus- AI+ FUNAAB'],
 ['10', 'DSNAIB010', 'Moyosore Oduwole', 'Female', 'City- AI+ Abeokuta'],
 ['11', 'DSNAIB011', 'Olamilekan Omotosho', 'Male', 'Campus- AI+ FUTA'],
 ['12', 'DSNAIB012', 'Matthew Oke', 'Male', 'Campus- AI+ FUTA'],
 ['13', 'DSNAIB013', 'Abdulqadri Afolabi', 'Male', 'Campus- AI+ FUT'],
 ['14', 'DSNAIB014', 'Gladens Popoola', 'Female', 'Campus- AI+ FUNAAB'],
 ['15', 'DSNAIB

## Converting the Data to Pandas DataFrame

### Getting the Headers

In [22]:
index_no = table[0].select('th[class=column-1]')[0].string
bootcamp_id = table[0].select('th[class=column-2]')[0].string
name = table[0].select('th[class=column-3]')[0].string
gender = table[0].select('th[class=column-4]')[0].string
community = table[0].select('th[class=column-5]')[0].string

header = [index_no, bootcamp_id, name, gender, community]
header

['S/N', 'Bootcamp ID', 'Name', 'Gender', 'Community']

In [26]:
bootcamp_participants = pd.DataFrame(data, columns=header)
bootcamp_participants.head()

Unnamed: 0,S/N,Bootcamp ID,Name,Gender,Community
0,1,DSNAIB001,Alexander Ifenaike,Male,Campus- AI+ UI
1,2,DSNAIB002,Busayo Awobade,Male,Campus- AI+ FUNAAB
2,3,DSNAIB003,Ahmed Adegunle,Male,City- AI+ Ado-Ekiti
3,4,DSNAIB004,Adepoju Oluwadara,Female,City- AI+ Abeokuta
4,5,DSNAIB005,Imonmion Emmanuel Bright,Male,Campus- AI+ UNIBEN


### Converting to CSV file

In [24]:
bootcamp_participants.to_csv("bootcamp_participants.csv", index=False)