##### Using yfinance

In [1]:
import yfinance as yf

# Download Brent oil historical data
brent = yf.download("BZ=F", start="1987-01-01", end="2023-12-31")
brent.to_csv('../data/brent_oil_prices_yf.csv')
print("Data saved to brent_oil_prices_yf.csv")


[*********************100%***********************]  1 of 1 completed

Data saved to brent_oil_prices_yf.csv





##### To scrape relevant events from a Wikipedia page (e.g., "Timeline of the oil industry" or a specific list of global events) for the years 1987-2023.

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# URL of the Wikipedia page with oil industry-related events
url = 'https://en.wikipedia.org/wiki/Price_of_oil'  # Replace with a relevant page

# Send a GET request to fetch page content
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

# Find the relevant sections for events
events = []

# Example: looking for events in <li> tags under certain headers
for li in soup.find_all('li'):
    # This may require adjustment based on the structure of the page
    text = li.get_text()
    year = text[:4]
    # Filter only events within the date range
    if year.isdigit() and 1987 <= int(year) <= 2023:
        events.append({
            'Year': int(year),
            'Event': text[5:]
        })

# Convert to DataFrame
events_df = pd.DataFrame(events)

# Save to CSV
events_df.to_csv('brent_oil_related_events.csv', index=False)
print("Events data saved to oil_related_events.csv")
display(events_df.head())


Events data saved to oil_related_events.csv


Unnamed: 0,Year,Event
0,2007,2008 world food price crisis
1,2011,2013 world oil market chronology
2,2014,2016 world oil market chronology
3,2017,2019 world oil market chronology
4,2020,2022 world oil market chronology


#### Data retrival from World bank

In [2]:
import requests
import pandas as pd

# Fetch country data from REST Countries API
response = requests.get("https://restcountries.com/v3.1/all")
country_data = response.json()

# Create lists for country metadata
countries = []
regions = []

# Extract relevant data
for country in country_data:
    countries.append(country.get('name', {}).get('common', ''))
    regions.append(country.get('region', ''))

# Create DataFrame
country_metadata = pd.DataFrame({
    'Country': countries,
    'Region': regions
})

# Save to CSV
country_metadata.to_csv('country_metadata.csv', index=False)
print("Country metadata saved to 'country_metadata.csv'")
display(country_metadata.head())

Country metadata saved to 'country_metadata.csv'


Unnamed: 0,Country,Region
0,South Georgia,Antarctic
1,Grenada,Americas
2,Switzerland,Europe
3,Sierra Leone,Africa
4,Hungary,Europe


In [4]:
import wbdata
import pandas as pd
from datetime import datetime

# Define indicators and date range
indicators = {'NY.GDP.MKTP.CD': 'GDP'}
date_range = (datetime(1987, 1, 1), datetime(2022, 12, 31))

# Fetch GDP data for all available countries
economic_data = wbdata.get_dataframe(indicators)

# Reset index to have 'Country' and 'Date' as columns
economic_data.reset_index(inplace=True)

# Check the columns of economic_data to ensure 'Country' exists
print("Columns in economic_data:", economic_data.columns)

# Load country metadata from CSV
country_df = pd.read_csv('C:/Users/elbet/OneDrive/Desktop/Ten/week-10/Change-point-analysis-and-statistical-modelling-of-time-series-data/Notebook/country_metadata.csv')

# Check the columns of country_df to ensure 'Country' exists
print("Columns in country_df:", country_df.columns)

# Rename columns if necessary to match
if 'name' in country_df.columns:
    country_df.rename(columns={'name': 'Country'}, inplace=True)

# Perform the merge
try:
    economic_data = pd.merge(economic_data, country_df, on='Country', how='left')
except KeyError as e:
    print(f"KeyError during merge: {e}")
    print("Available columns in economic_data:", economic_data.columns)
    print("Available columns in country_df:", country_df.columns)

# Save the resulting DataFrame to CSV
economic_data.to_csv('C:/Users/elbet/OneDrive/Desktop/Ten/week-10/Change-point-analysis-and-statistical-modelling-of-time-series-data/data/All_economic_data.csv', index=False)

# Display the first few rows of the final DataFrame
# print(economic_data)
display(economic_data.head())

Columns in economic_data: Index(['country', 'date', 'GDP'], dtype='object')
Columns in country_df: Index(['Country', 'Region'], dtype='object')
KeyError during merge: 'Country'
Available columns in economic_data: Index(['country', 'date', 'GDP'], dtype='object')
Available columns in country_df: Index(['Country', 'Region'], dtype='object')


Unnamed: 0,country,date,GDP
0,Africa Eastern and Southern,2023,1236163000000.0
1,Africa Eastern and Southern,2022,1183962000000.0
2,Africa Eastern and Southern,2021,1086772000000.0
3,Africa Eastern and Southern,2020,929074100000.0
4,Africa Eastern and Southern,2019,1006527000000.0


In [32]:
import requests
import pandas as pd

# Your EIA API key
api_key = "DkCvZcDDJGlhhRj59orlwJUmTtACzClEbRPnaS7q"

# The series ID for Brent Crude oil prices (you might need to confirm the exact Series ID)
series_id = "PET.RBRTE.M"  # This is typically for Brent Crude Oil Europe Spot Price, Monthly

# Define the API endpoint
url = f"https://api.eia.gov/API_route?{api_key}"

# Send the GET request to the API
response = requests.get(url)
data = response.json()

# Check if data retrieval was successful
if 'series' in data:
    # Extract the data points
    records = data['series'][0]['data']
    
    # Convert to DataFrame and set up columns
    df = pd.DataFrame(records, columns=["Date", "Price"])
    
    # Convert the date column to datetime format and sort by date
    df['Date'] = pd.to_datetime(df['Date'], format='%Y%m')
    df = df.sort_values(by="Date").reset_index(drop=True)
    
    # Display the first few rows of the dataframe
    print(df.head())
    
    # Save to CSV
    df.to_csv("brent_crude_oil_prices.csv", index=False)
    print("Data saved to brent_crude_oil_prices.csv")
else:
    print("Failed to retrieve data. Please check the series ID or API key.")


Failed to retrieve data. Please check the series ID or API key.
