In [1]:
import requests

In [2]:
covid_stats = 'https://epidemic-stats.com/'

In [3]:
response = requests.get(covid_stats)

In [4]:
response.status_code

200

In [5]:
len(response.text)

1316352

In [6]:
page_contents = response.text

In [7]:
page_contents[:500]

'\n<!DOCTYPE html>\n<html lang="en">\n<head>\n    <meta charset="UTF-8">\n    <meta name="viewport" content="width=device-width, initial-scale=1.0">\n    <meta name="keywords"\n          content="epidemic, pandemic, coronavirus, COVID-19, covid, 19, 2019-nCoV, SARS-nCOV2, news, live, realtime, stats, statistics, charts, diagrams, countires">\n\n    \n    <meta name="description"\n          content="Monitoring of epidemic. Real-time coronavirus statistics. Live COVID-19 stats with charts for many countries."'

In [8]:
from bs4 import BeautifulSoup

In [9]:
doc = BeautifulSoup(page_contents,'html.parser')

In [10]:
country_selection = 'text-primary'
countries_a_tag = doc.find_all("a",{'class':country_selection})
len(countries_a_tag)     

210

In [11]:
countries_a_tag[:5]

[<a class="text-primary" href="/coronavirus/usa">
 <img src="https://www.countryflags.io/US/flat/16.png"/>
                     USA</a>,
 <a class="text-primary" href="/coronavirus/india">
 <img src="https://www.countryflags.io/IN/flat/16.png"/>
                     India</a>,
 <a class="text-primary" href="/coronavirus/brazil">
 <img src="https://www.countryflags.io/BR/flat/16.png"/>
                     Brazil</a>,
 <a class="text-primary" href="/coronavirus/russia">
 <img src="https://www.countryflags.io/RU/flat/16.png"/>
                     Russia</a>,
 <a class="text-primary" href="/coronavirus/france">
 <img src="https://www.countryflags.io/FR/flat/16.png"/>
                     France</a>]

In [12]:
infected_selection = 'infected-badges'

infected_span_tag = doc.find_all('span',{'class':infected_selection})
len(infected_span_tag)

infected_span_tag[:5]

[<span class="infected-badges">37365716</span>,
 <span class="infected-badges">32190846</span>,
 <span class="infected-badges">20319000</span>,
 <span class="infected-badges">6579212</span>,
 <span class="infected-badges">6425436</span>]

In [13]:
countries_a_tag[0].text

countries_a_tag[0].text.strip()

'USA'

In [14]:
deaths_selection = 'deaths-badges'
death_span_tag = doc.find_all('span',{'class':deaths_selection})
len(death_span_tag)

210

In [15]:
death_span_tag[0].text

int(death_span_tag[0].text)

637170

In [16]:
recovered_selection = 'recovered-badges'
recovered_span_tag = doc.find_all('span',{'class': recovered_selection})
len(recovered_span_tag)

210

In [17]:
recovered_span_tag[0].text.strip()

'30133691'

In [18]:
death_percent_selection = 'deaths-badges-outline'
death_percent_span_tag = doc.find_all('span',{'class':death_percent_selection})
len(death_percent_span_tag)

death_percent_span_tag[0].text

'1.7%'

In [19]:
float(death_percent_span_tag[0].text.strip('%'))

1.7

In [20]:
recovered_percent_selection = 'recovered-badges-outline'
recovered_percent_span_tag = doc.find_all('span',{'class':recovered_percent_selection})
len(recovered_percent_span_tag)


float(recovered_percent_span_tag[0].text.strip('%'))

80.6

In [21]:
country_name = []

for country in countries_a_tag:
  country_name.append(country.text.strip())

print(country_name[:10])

['USA', 'India', 'Brazil', 'Russia', 'France', 'UK', 'Turkey', 'Argentina', 'Colombia', 'Spain']


In [22]:
infected = []

for inf in infected_span_tag:
  infected.append(int(inf.text))
print(infected[:10])

[37365716, 32190846, 20319000, 6579212, 6425436, 6241011, 6039857, 5074725, 4860622, 4693540]


In [23]:
deaths = []
for death in death_span_tag:
  deaths.append(int(death.text))
print(deaths[:10])

[637170, 431240, 567914, 169683, 112561, 130894, 52860, 108815, 123221, 82470]


In [24]:
recovered = []
for recover in recovered_span_tag:
  recovered.append(int(recover.text))
print(recovered[:10])

[30133691, 31365316, 19173917, 5867890, 5866208, 4796774, 5575214, 4725426, 4681231, 3888717]


In [25]:
death_percent = []
for dp in death_percent_span_tag:
  death_percent.append(float(dp.text.strip('%')))
print(death_percent[:10])

[1.7, 1.3, 2.8, 2.6, 1.8, 2.1, 0.9, 2.1, 2.5, 1.8]


In [26]:
recovered_percent=[]
for rp in recovered_percent_span_tag:
  recovered_percent.append(float(rp.text.strip('%')))
print(recovered_percent[:10])

[80.6, 97.4, 94.4, 89.2, 91.3, 76.9, 92.3, 93.1, 96.3, 82.9]


In [27]:
import pandas as pd 

covid_dict ={
    'country':country_name,
    'infected':infected,
    'deaths':deaths,
    'recovered':recovered,
    'death_percent':death_percent,
    'recovered_percent':recovered_percent
}

covid_data = pd.DataFrame(covid_dict)
covid_data.head()

Unnamed: 0,country,infected,deaths,recovered,death_percent,recovered_percent
0,USA,37365716,637170,30133691,1.7,80.6
1,India,32190846,431240,31365316,1.3,97.4
2,Brazil,20319000,567914,19173917,2.8,94.4
3,Russia,6579212,169683,5867890,2.6,89.2
4,France,6425436,112561,5866208,1.8,91.3


In [28]:
covid_data.to_csv('covidData.csv',index=None)