In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
def get_soup(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text , 'html')
    return soup

def get_price_with_country(soup):
    country_divs = soup.find_all('div', class_='outsideTitleElement')
    country = [country_div.text.strip().replace('*', '') for country_div in country_divs]

    price_divs= soup.find_all('div', style="position: absolute; top: 2px; left: 7px; height: 15px; color: #000000;")
    price = [price_div.text.strip() for price_div in price_divs]

    return country, price

def get_dataframe(country, price):
    df = pd.DataFrame()
    df['country'] = country
    df['price'] = price
    return df

In [3]:
energy_sources = ['gasoline','diesel','lpg','electricity','natural_gas']

combined_df = None

for energy_source in energy_sources:
    url = f'https://www.globalpetrolprices.com/{energy_source}_prices/'
    soup = get_soup(url)
    country , price = get_price_with_country(soup)
    if combined_df is not None:
        df = get_dataframe(country, price)
        df.rename(columns = {'price':energy_source}, inplace = True)
        combined_df = combined_df.merge(df, on='country', how='outer')
    else:
        combined_df = get_dataframe(country, price)
        combined_df.rename(columns = {'price':energy_source}, inplace = True)



In [4]:
combined_df.head()

Unnamed: 0,country,gasoline,diesel,lpg,electricity,natural_gas
0,Iran,0.029,0.006,,0.002,0.001
1,Libya,0.031,0.031,,0.008,
2,Venezuela,0.035,0.004,,0.046,
3,Algeria,0.338,0.214,0.066,0.039,0.003
4,Kuwait,0.34,0.372,,0.029,


In [5]:
combined_df.to_csv('energy-source-price.csv', index=False)