In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
from random import randint
from time import time,sleep
from datetime import datetime, timedelta
import schedule

In [2]:
# Function to scrape data from the website
def scrape_forex_data():
    # This section opens the browser using selenium webdriver and navigates to the this week tab
    URL = "https://www.investing.com/economic-calendar/"

    #open the browser
    browser = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

    #load page
    browser.get(URL)

    #navigate to this week's tab
    this_week_tab = browser.find_element(by=By.XPATH,value="//a[@id='timeFrame_thisWeek']")

    #click to this week's tab
    this_week_tab.click()

    sleep(2)

    #print(browser.page_source.encode('utf-8'))

    # create the Beautiful Soup object
    soup = BeautifulSoup(browser.page_source, 'html.parser')

    # find the table with class 'js-economic-calendar-data'
    table = soup.find('table', id='economicCalendarData')

    d = {"Date":[], "Time":[], "Currency":[], "Impact":[],"Description":[]}

    # iterate through each row in the table and extract the data for this week
    for row in table.find_all('tr'):
        # check if the row has a data-event-datetime attribute
        if row.has_attr('data-event-datetime'):
            # extract the date and time from the data-event-datetime attribute
            event_datetime = row['data-event-datetime']
            #parse into a Python datetime object and store as date into dictionary
            d['Date'].append((datetime.strptime(event_datetime, '%Y/%m/%d %H:%M:%S')).date())
            # extract time from the row and store into dictionary
            d['Time'].append(row.find('td', class_='time').text.strip())
            # extract currency from the row and store into dictionary
            d['Currency'].append(row.find('td', class_='flagCur').text.strip())
            # find all the i tags with class grayFullBullishIcon- they have impact rating
            impact_list = row.find_all('i', class_='grayFullBullishIcon')
            impact_level = ''
            if len(impact_list) == 1:
                impact_level = 'Low Impact'
            elif len(impact_list) == 2:
                impact_level = 'Medium Impact'
            elif len(impact_list) == 3:
                impact_level = 'High Impact'

            d['Impact'].append(impact_level)
            # extract impact from the row and store into dictionary
            d['Description'].append( row.find('td', class_='event').text.strip())
        
    investing_df = pd.DataFrame.from_dict(d) 

    # Save the data to a CSV file
    reload_date = datetime.today().strftime('%Y-%m-%d')
    week_num = datetime.today().strftime('%U')
    year_num = datetime.today().strftime('%Y')
    filename = f'investing_df{reload_date}_week{week_num}_year{year_num}.csv'
     #insert path of folder you want the csv to be in between r''
    investing_df.to_csv(r'' + '\\' + filename, header=True, index=False)
    
        
    # Close the web driver
    browser.quit()


In [3]:
scrape_forex_data()