In [None]:
# Import the necessary libraries
import undetected_chromedriver as uc  # For using Chrome browser
from selenium.webdriver.common.by import By  # For locating elements
import time  # For adding delays
import pandas as pd  # For working with dataframes
from tqdm.notebook import tqdm  # For progress bar

# Initialize empty lists to store data
release_dates, times, actuals, forecasts, previouss = [], [], [], [], []

# Initialize a Chrome browser instance
driver = uc.Chrome()

# Maximize the browser window
driver.maximize_window()

# Open the target website
driver.get('https://in.investing.com/economic-calendar/api-weekly-crude-stock-656')

# Find the "Show More" button on the page
show_more_btn = driver.find_element(By.XPATH, '//*[@id="js-main-container"]/section[1]/div/section[2]/div[2]/div/button')

# Find the container element that holds the table data
container = driver.find_element(By.XPATH, '//*[@id="js-main-container"]/section[1]/div/section[2]/div[2]/section/div[1]/div/table/tbody')

# Set a flag to control the loop
not_2012 = True

# Scroll and click the "Show More" button until 2012 data is reached
while not_2012:
    time.sleep(1)
    show_more_btn.click()

    # Get the time of the last line in the table
    last_line_time = container.find_element(By.XPATH, './tr[last()]/td[1]/span').text

    # Extract the year from the last line time
    last_line_time_year = container.find_element(By.XPATH, './tr[last()]/td[1]/span').text.split(', ')[1]

    print(last_line_time, end='\r')

    # Check if the year is 2012, if so, exit the loop
    if last_line_time_year == '2012':
        not_2012 = False

# Find all the rows in the table
rows = container.find_elements(By.XPATH, './tr')

# Loop through each row and extract the data
for row in tqdm(rows):
    time.sleep(1)

    # Try to extract the release date from the row
    try:
        release_date = row.find_element(By.XPATH, './td[1]').text
        release_dates.append(release_date)
    except:
        release_date = None
        release_dates.append(release_date)

    # Try to extract the time from the row
    try:
        time_ = row.find_element(By.XPATH, './td[2]').text
        times.append(time_)
    except:
        time_ = None
        times.append(time_)

    # Try to extract the actual value from the row
    try:
        actual = row.find_element(By.XPATH, './td[3]').text
        actuals.append(actual)
    except:
        actual = None
        actuals.append(actual)

    # Try to extract the forecast value from the row
    try:
        forecast = row.find_element(By.XPATH, './td[4]').text
        forecasts.append(forecast)
    except:
        forecast = None
        forecasts.append(forecast)

    # Try to extract the previous value from the row
    try:
        previous = row.find_element(By.XPATH, './td[5]').text
        previouss.append(previous)
    except:
        previous = None
        previouss.append(previous)

    print(release_date, ":", time_, ":", actual, ":", forecast, ":", previous)

# Create a pandas DataFrame from the scraped data
df = pd.DataFrame(list(zip(release_dates, times, actuals, forecasts, previouss)),
                  columns=["Release Date", "Time", "Actual", "Forecast", "Previous"])

# Save the DataFrame to a CSV file
df.to_csv("Investing.csv", index=False, encoding="utf-8-sig")

# Quit the browser
driver.quit()