In [1]:
!pip3 install -r requirements.txt



In [2]:
try:
    basestring
except NameError:
    basestring = str

from datetime import datetime
from decimal import Decimal
from future.utils import iteritems
import dateutil.parser

class BaseModel(object):

    """ Base class for other models. """
    
    def __init__(self, **kwargs):
        self._default_params = {}

    @classmethod
    def _NewFromJsonDict(cls, data, **kwargs):
        if kwargs:
            for key, val in kwargs.items():
                data[key] = val
        return cls(**data)

class Book(BaseModel):
    """A class that represents the Bitso orderbook and it's limits"""

    def __init__(self, **kwargs):
        self._default_params = {
            'symbol': kwargs.get('book'),
            'minimum_amount': Decimal(kwargs.get('minimum_amount')),
            'maximum_amount': Decimal(kwargs.get('maximum_amount')),
            'minimum_price': Decimal(kwargs.get('minimum_price')),
            'maximum_price': Decimal(kwargs.get('maximum_price')),
            'minimum_value': Decimal(kwargs.get('minimum_value')),
            'maximum_value': Decimal(kwargs.get('maximum_value'))
        }
        
        for (param, val) in self._default_params.items():
            setattr(self, param, val)

    def __repr__(self):
        return "Book(symbol={symbol})".format(symbol=self.symbol)
    
class AvailableBooks(BaseModel):
    """A class that represents Bitso's orderbooks"""
    def __init__(self, **kwargs):
        self.books = []
        for ob in kwargs.get('payload'):
            self.books.append(ob['book'])
            setattr(self, ob['book'], Book._NewFromJsonDict(ob))

    def __repr__(self):
        return "AvilableBooks(books={books})".format(books=','.join(self.books))


In [3]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#The MIT License (MIT)
#
#Copyright (c) 2016 Mario Romero 
#
#Permission is hereby granted, free of charge, to any person obtaining a copy
#of this software and associated documentation files (the "Software"), to deal
#in the Software without restriction, including without limitation the rights
#to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
#copies of the Software, and to permit persons to whom the Software is
#furnished to do so, subject to the following conditions:
#
#The above copyright notice and this permission notice shall be included in all
#copies or substantial portions of the Software.
#
#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
#IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
#FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
#AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
#LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
#OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
#SOFTWARE.

from __future__ import absolute_import

import hashlib
import hmac
import json
import time
import requests

from future.utils import iteritems

try:
    from urllib.parse import urlparse, urlencode
except ImportError:
    from urlparse import urlparse
    from urllib import urlencode

def current_milli_time():
    nonce =  str(int(round(time.time() * 1000000)))
    return nonce

class ApiError(Exception):
    pass

class ApiClientError(Exception):
    pass

class Api(object):
    """A python interface for the Bitso API

    Example usage:
      To create an instance of the bitso.Api class, without authentication:
      
        >>> import bitso
        >>> api = bitso.Api()
      
      To get the Bitso price ticker:
      
        >>> ticker = api.ticker()
        >>> print ticker.ask
        >>> print ticker.bid

      To use the private endpoints, initiate bitso.Api with a client_id,
      api_key, and api_secret (see https://bitso.com/developers?shell#private-endpoints):
      
        >>> api = bitso.Api(API_KEY, API_SECRET)
        >>> balance = api.balance()
        >>> print balance.btc_available
        >>> print balance.mxn_available
    """
    
    def __init__(self, key=None, secret=None, timeout=0):
        """Instantiate a bitso.Api object.
        
        Args:
          key:
            Bitso API Key 
          secret:
            Bitso API Secret

  
        """
        self.base_url_v2 = "https://bitso.com/api/v2"
        self.base_url = "https://bitso.com/api/v3"
        self.key = key
        self._secret = secret
        self.timeout = timeout

    def available_books(self):
        """
        Returns:
          A list of bitso.AvilableBook instances
        """
        url = '%s/available_books/' % self.base_url
        resp = self._request_url(url, 'GET')
        return AvailableBooks._NewFromJsonDict(resp)
    
    def _build_auth_payload(self):
        parameters = {}
        parameters['key'] = self.key
        parameters['nonce'] = str(int(time.time()))
        msg_concat = parameters['nonce']+self.client_id+self.key
        parameters['signature'] = hmac.new(self._secret.encode('utf-8'),
                                           msg_concat.encode('utf-8'),
                                           hashlib.sha256).hexdigest()
        return parameters

    def _build_auth_header(self, http_method, url, json_payload=''):
        if json_payload == {} or json_payload=='{}':
            json_payload = ''
        url_components = urlparse(url)
        request_path = url_components.path
        if url_components.query != '':
            request_path+='?'+url_components.query
        nonce = current_milli_time()
        msg_concat = nonce+http_method.upper()+request_path+json_payload
        signature = hmac.new(self._secret.encode('utf-8'),
                                 msg_concat.encode('utf-8'),
                                 hashlib.sha256).hexdigest()
        return {'Authorization': 'Bitso %s:%s:%s' % (self.key, nonce, signature)}

    
    def _request_url(self, url, verb, params=None, private=False):
        headers=None
        if params == None:
            params = {}
        params = {k: v.decode("utf-8") if isinstance(v, bytes) else v for k, v in params.items()}
        if private:
            headers = self._build_auth_header(verb, url, json.dumps(params))
        if verb == 'GET':
            url = self._build_url(url, params)
            if private:
                headers = self._build_auth_header(verb, url)
            try:
                resp = requests.get(url, headers=headers, timeout=self.timeout)
            except requests.RequestException as e:
                raise
        elif verb == 'POST':
            try:
                resp = requests.post(url, json=params, headers=headers, timeout=self.timeout)
            except requests.RequestException as e:
                raise
        elif verb == 'DELETE':
            try:
                resp = requests.delete(url, headers=headers, timeout=self.timeout)
            except requests.RequestException as e:
                raise
        content = resp.content
        data = self._parse_json(content if isinstance(content, basestring) else content.decode('utf-8'))
        return data

    def _build_url(self, url, params):
        if params and len(params) > 0:
            url = url+'?'+self._encode_parameters(params)
        return url

    def _encode_parameters(self, parameters):
        if parameters is None:
            return None
        else:
            param_tuples = []
            for k,v in parameters.items():
                if v is None:
                    continue
                if isinstance(v, (list, tuple)):
                    for single_v in v:
                        param_tuples.append((k, single_v))
                else:
                    param_tuples.append((k,v))
            return urlencode(param_tuples)


         
    def _parse_json(self, json_data):
        try:
            data = json.loads(json_data)
            self._check_for_api_error(data)
        except:
            raise
        return data

    def _check_for_api_error(self, data):
        if data['success'] != True:
            raise ApiError(data['error'])
        if 'error' in data:
            raise ApiError(data['error'])
        if isinstance(data, (list, tuple)) and len(data)>0:
            if 'error' in data[0]:
                raise ApiError(data[0]['error'])

In [4]:
api = Api(timeout=5)
avb_books = api.available_books()
print(f"Total Available Books: {len(avb_books.books)}")
print(f"Available Books: {avb_books.books}")

Total Available Books: 97
Available Books: ['btc_mxn', 'eth_mxn', 'xrp_mxn', 'ltc_mxn', 'bch_mxn', 'tusd_btc', 'tusd_mxn', 'mana_mxn', 'bat_mxn', 'btc_ars', 'btc_dai', 'dai_mxn', 'btc_usd', 'xrp_usd', 'eth_usd', 'dai_ars', 'btc_brl', 'eth_ars', 'eth_brl', 'btc_usdt', 'usd_mxn', 'usd_ars', 'usd_brl', 'mana_usd', 'ltc_usd', 'comp_usd', 'link_usd', 'uni_usd', 'aave_usd', 'chz_usd', 'btc_cop', 'axs_usd', 'dydx_usd', 'yfi_usd', 'sand_usd', 'shib_usd', 'snx_usd', 'matic_usd', 'mkr_usd', 'enj_usd', 'ftm_usd', 'crv_usd', 'gala_usd', 'ada_usd', 'lrc_usd', 'grt_usd', 'ape_usd', 'sushi_usd', 'omg_usd', 'sol_usd', 'dot_usd', 'qnt_usd', 'doge_usd', 'eth_cop', 'xrp_cop', 'usd_cop', 'bal_usd', 'trx_usd', 'algo_usd', 'ldo_usd', 'xlm_usd', 'matic_brl', 'ada_brl', 'sol_brl', 'xrp_brl', 'doge_brl', 'chz_brl', 'usdt_brl', 'paxg_usd', 'shib_brl', 'avax_usd', 'eur_mxn', 'tigres_mxn', 'usdt_cop', 'eur_brl', 'dot_brl', 'ltc_brl', 'link_brl', 'tigres_usd', 'uni_brl', 'mana_brl', 'bch_brl', 'bat_usd', 'bch_usd'

In [5]:
usd_books = [book for book in avb_books.books if 'mxn' not in book]
print(f"Total USD Available Books: {len(usd_books)}")
print(f"USD Available Books: {usd_books}")

Total USD Available Books: 80
USD Available Books: ['tusd_btc', 'btc_ars', 'btc_dai', 'btc_usd', 'xrp_usd', 'eth_usd', 'dai_ars', 'btc_brl', 'eth_ars', 'eth_brl', 'btc_usdt', 'usd_ars', 'usd_brl', 'mana_usd', 'ltc_usd', 'comp_usd', 'link_usd', 'uni_usd', 'aave_usd', 'chz_usd', 'btc_cop', 'axs_usd', 'dydx_usd', 'yfi_usd', 'sand_usd', 'shib_usd', 'snx_usd', 'matic_usd', 'mkr_usd', 'enj_usd', 'ftm_usd', 'crv_usd', 'gala_usd', 'ada_usd', 'lrc_usd', 'grt_usd', 'ape_usd', 'sushi_usd', 'omg_usd', 'sol_usd', 'dot_usd', 'qnt_usd', 'doge_usd', 'eth_cop', 'xrp_cop', 'usd_cop', 'bal_usd', 'trx_usd', 'algo_usd', 'ldo_usd', 'xlm_usd', 'matic_brl', 'ada_brl', 'sol_brl', 'xrp_brl', 'doge_brl', 'chz_brl', 'usdt_brl', 'paxg_usd', 'shib_brl', 'avax_usd', 'usdt_cop', 'eur_brl', 'dot_brl', 'ltc_brl', 'link_brl', 'tigres_usd', 'uni_brl', 'mana_brl', 'bch_brl', 'bat_usd', 'bch_usd', 'eth_btc', 'bar_usd', 'usdt_ars', 'psg_usd', 'atom_usd', 'near_usd', 'usd_usdt', 'pepe_usd']


In [6]:
usd_books = [book.replace('_', '-') for book in usd_books]
print(f"USD Available Books: {usd_books}")

USD Available Books: ['tusd-btc', 'btc-ars', 'btc-dai', 'btc-usd', 'xrp-usd', 'eth-usd', 'dai-ars', 'btc-brl', 'eth-ars', 'eth-brl', 'btc-usdt', 'usd-ars', 'usd-brl', 'mana-usd', 'ltc-usd', 'comp-usd', 'link-usd', 'uni-usd', 'aave-usd', 'chz-usd', 'btc-cop', 'axs-usd', 'dydx-usd', 'yfi-usd', 'sand-usd', 'shib-usd', 'snx-usd', 'matic-usd', 'mkr-usd', 'enj-usd', 'ftm-usd', 'crv-usd', 'gala-usd', 'ada-usd', 'lrc-usd', 'grt-usd', 'ape-usd', 'sushi-usd', 'omg-usd', 'sol-usd', 'dot-usd', 'qnt-usd', 'doge-usd', 'eth-cop', 'xrp-cop', 'usd-cop', 'bal-usd', 'trx-usd', 'algo-usd', 'ldo-usd', 'xlm-usd', 'matic-brl', 'ada-brl', 'sol-brl', 'xrp-brl', 'doge-brl', 'chz-brl', 'usdt-brl', 'paxg-usd', 'shib-brl', 'avax-usd', 'usdt-cop', 'eur-brl', 'dot-brl', 'ltc-brl', 'link-brl', 'tigres-usd', 'uni-brl', 'mana-brl', 'bch-brl', 'bat-usd', 'bch-usd', 'eth-btc', 'bar-usd', 'usdt-ars', 'psg-usd', 'atom-usd', 'near-usd', 'usd-usdt', 'pepe-usd']


In [7]:
def from_book(book):
    cum = []
    start = False
    for usd_book in usd_books:
        if usd_book == book:
            start = True
        if start:
            cum.append(usd_book)
    print(f"From chosen USD Available Book: {cum}")
    return cum

In [8]:
import psycopg2


def create_connection():
    # Replace these placeholders with your actual database credentials
    '''
    dbname = os.environ['DB_NAME']
    user = os.environ['DB_USER']
    password = os.environ['DB_PASSWORD']
    host = os.environ['DB_HOST']  # Change to your database host if it's not local
    port = os.environ['DB_PORT']
    '''
    dbname = "cryptostocks"
    user = "postgres"
    password = "gallo"
    host = "localhost"  # Change to your database host if it's not local
    port = "5432"  # Change to your database port if it's not the default (5432)

    try:
        connection = psycopg2.connect(
            dbname=dbname,
            user=user,
            password=password,
            host=host,
            port=port
        )
        print("Connection to the database successful!")
        return connection
    except Exception as e:
        print(f"Error: Unable to connect to the database. {e}")
        return None

def create_table(connection):
    try:
        cursor = connection.cursor()
        # Define your table schema here
        create_table_query = """
            CREATE TABLE IF NOT EXISTS historical (
                reference VARCHAR(255),
                book VARCHAR(255),
                date DATE,
                open FLOAT,
                high FLOAT,
                low FLOAT,
                close FLOAT,
                adj_close FLOAT,
                volume BIGINT
            )
        """

        cursor.execute(create_table_query)
        connection.commit()
        print("Table created successfully!")
    except Exception as e:
        print(f"Error: Unable to create the table. {e}")
    cursor.close()

def delete_table(table_name, conn):
    # Create a cursor to execute SQL commands
    cursor = conn.cursor()

    try:
        # Generate the SQL DROP TABLE statement
        drop_table_sql = f"DROP TABLE IF EXISTS {table_name}"

        # Execute the DROP TABLE statement
        cursor.execute(drop_table_sql)

        # Commit the changes to the database
        conn.commit()
        print(f"The table '{table_name}' has been deleted successfully.")
    except Error as e:
        # If an error occurs, print the error message
        print("Error:", e)
        conn.rollback()
    finally:
        # Close the cursor
        cursor.close()

        
def save_to_postgres(row_data, header, conn):
    cursor = conn.cursor()
    # Create a dictionary to map header to data in the current row
    row_dict = dict(zip(header, row_data))

    # Generate the SQL INSERT statement
    insert_sql = "INSERT INTO historical ({}) VALUES ({}) ON CONFLICT DO NOTHING".format(
        ", ".join(row_dict.keys()),
        ", ".join("%s" for _ in range(len(row_dict)))
    )

    try:
        # Execute the SQL query with row_data as the values to be inserted
        cursor.execute(insert_sql, list(row_dict.values()))
        # Commit the changes to the database
        conn.commit()
    except Exception as e:
        # If an error occurs, print the error message and roll back the transaction
        print("error on saving data to pg:", e, "\n row_dict:", row_dict)
        conn.rollback()
    cursor.close()

def init_db():
    # Connect to the database
    connection = create_connection()
    if not connection:
        return

    # Create the table (if not exists)
    create_table(connection)
    return connection

def close_connection(conn):
    # Close the connection
    conn.close()
    print("Connection closed.")

In [9]:
conn = init_db()

Connection to the database successful!
Table created successfully!


In [10]:
import remoteStorage as rs
import boto3

def store_to_s3(bucket_name, folder_name):
    # Bucket name and folder paths
    local_file_path = "path/to/local/file.txt"
    s3_file_path = f"{folder_name}/file.txt"

    # Create the bucket and folder if they don't exist
    rs.create_bucket(bucket_name)
    s3 = boto3.resource('s3')
    bucket = s3.Bucket(bucket_name)
    bucket.put_object(Key=s3_file_path, Body="")  # Create an empty object to create the folder

    # Upload the file to S3
    rs.upload_file_to_s3(bucket_name, local_file_path, s3_file_path)

In [11]:
import time
import json
import time
import pandas as pd
from selenium.common.exceptions import TimeoutException, NoSuchElementException
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.firefox.service import Service as FirefoxService
from webdriver_manager.firefox import GeckoDriverManager
from selenium.webdriver.common.by import By

In [12]:
contents = []
url = f'https://finance.yahoo.com/lookup'
xpath = "/html/body/div[1]/div/div/div[1]/div/div[3]/div[1]/div/div[2]/div/div/div/ul/li[1]/div/div/div[2]/h3/a"


In [13]:

HISTORIAL_DATA_BTN = "/html/body/div[1]/div/div/div[1]/div/div[2]/div/div/div[7]/div/div/section/div/ul/li[4]/a"

TBODY = "/html/body/div[1]/div/div/div[1]/div/div[3]/div[1]/div/div[2]/section/div[2]/table/tbody"


In [14]:
from datetime import datetime

def parse_date(date_str):
    # Convert the month name to a numerical representation using a dictionary
    month_dict = {
        "Jan": "01",
        "Feb": "02",
        "Mar": "03",
        "Apr": "04",
        "May": "05",
        "Jun": "06",
        "Jul": "07",
        "Aug": "08",
        "Sep": "09",
        "Oct": "10",
        "Nov": "11",
        "Dec": "12",
    }

    date_str = date_str.replace(",", "")

    # Split the date string into month, day, and year
    month, day, year = date_str.split()

    # Get the numerical representation of the month from the dictionary
    month_number = month_dict[month]

    # Create a new date string in the format 'year-month-day' (e.g., '2023-08-01')
    formatted_date_str = f"{year}-{month_number}-{day}"

    # Parse the formatted date string to a datetime object
    parsed_date = datetime.strptime(formatted_date_str, "%Y-%m-%d")

    return parsed_date

def parse_row_data(row_data):
    try:
        date_format = '%Y-%m-%d'  # Format for parsing date strings

        # Remove commas from numeric values
        row_data = [item.replace(",", "") if isinstance(item, str) else item for item in row_data]

        # Parse elements at specific positions into desired data types
        row_data[0] = parse_date(row_data[0])
        row_data[1] = float(row_data[1])
        row_data[2] = float(row_data[2])
        row_data[3] = float(row_data[3])
        row_data[4] = float(row_data[4])
        row_data[5] = float(row_data[5])
        row_data[6] = int(row_data[6])
        return row_data
    except Exception as e:
        print("error during parsing data:", e, "row_data: ", row_data)

In [15]:
import csv
import os

def save_unavailable_book(book_name):
    try:
        current_directory = os.getcwd()
        unavailable_books_file = os.path.join(current_directory, "unavailable_books.csv")
        
        file_exists = os.path.isfile(unavailable_books_file)
        with open(unavailable_books_file, "a", newline="") as csvfile:
            writer = csv.writer(csvfile)
            if not file_exists:
                writer.writerow(["book"])  # Add header if the file is newly created
            writer.writerow([book_name])
        print(f"Book '{book_name}' added to unavailable_books.csv")
    except Exception as e:
        print(f"Error while saving book '{book_name}' to CSV: {e}")

In [16]:
def get_dynamic_url(ticker, period1=1410825600, period2=1690675200, interval="1d",adjclose="true"):
    return f'https://finance.yahoo.com/quote/{ticker.upper()}/history?period1={period1}&period2={period2}&interval={interval}&filter=history&frequency={interval}&includeAdjustedClose={adjclose}'

def scroll_to_bottom(driver):
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
    
def is_at_bottom(driver):
    lastHeight = driver.execute_script("return document.documentElement.scrollHeight")
    while True:
        driver.execute_script("var scrollingElement = (document.scrollingElement || document.body);scrollingElement.scrollTop = scrollingElement.scrollHeight;")
        height = driver.execute_script("return document.documentElement.scrollHeight")
        driver.execute_script("window.scrollTo(0, " + str(height) + ");")
        time.sleep(2)
        if lastHeight == height:
            print("scrolling down task finished")
            break
        lastHeight = height

def check_tab_header(driver):
    try:
        element = driver.find_element(By.XPATH, '//*[@id="quote-nav"]')
        #tab = driver.find_element(By.XPATH, '/html/body/div[1]/div/div/div[1]/div/div[2]/div/div/div[7]/section/div/ul/li[3]/a')
        return True
    except Exception as e:
        print(f"financial header or historical data tab does not exist: {e}")
        return False
    
def nomatchresult(driver):
    wait = WebDriverWait(driver, 3.0)
    el = '/html/body/div[1]/div/div/div[1]/div/div[3]/div[1]/div/div[1]/div/div/section/section/div/div/span/span'
    try:
        wait.until(EC.presence_of_element_located((By.XPATH, el)))
        match_result = driver.find_element(By.XPATH, "/html/body/div[1]/div/div/div[1]/div/div[3]/div[1]/div/div[1]/div/div/section/section/div/div/span/span")
        print(f"output: {match_result.text.lower()}")
        return "No results for".lower() in match_result.text.lower()
    except Exception as nse:
            print("queried book was found!")
            return False
            
def lookup_ticker(driver, ticker):
    RejectAll= driver.find_element(By.XPATH, '/html/body/div[1]/div/div/div[1]/div/div[3]/div[2]/div/div/div/div/div/div[1]/div/div/div/form/input')
    action = ActionChains(driver)
    action.click(on_element = RejectAll)
    action.perform()
    time.sleep(5)
    SearchBar = driver.find_element(By.ID, "yfin-usr-qry")
    SearchBar.send_keys(ticker.upper())
    SearchBar.send_keys(Keys.ENTER)

def select_historical_li(driver):
    li_historical_a = driver.find_element(By.XPATH, '/html/body/div[1]/div/div/div[1]/div/div[2]/div/div/div[7]/div/div/section/div/ul/li[4]/a')
    action = ActionChains(driver)
    action.click(on_element = li_historical_a)
    action.perform()
    time.sleep(3)

def disable_ad(driver): 
    wait = WebDriverWait(driver, 3.0)
    try:
        ad_element = '//*[@id="Col1-0-Ad-Proxy"]'
        wait.until(EC.presence_of_element_located((By.XPATH, ad_element)))
        driver.execute_script("arguments[0].style.display = 'none';", ad_element)
    except Exception as e:
        print("ad element was not found")

def search_selector(driver):
    wait = WebDriverWait(driver, 3.0)
    try:
        selector1 = "/html/body/div[1]/div/div/div[1]/div/div[3]/div[1]/div/div[2]/section"
        wait.until(EC.presence_of_element_located((By.XPATH, selector1)))
        return selector1
    except Exception as e:
        print("selector1 for time period not found trying the second")
        try:
            selector2 = "/html/body/div[1]/div/div/div[1]/div/div[3]/div[1]/div/div[2]/div/div/section"
            wait.until(EC.presence_of_element_located((By.XPATH, selector2)))
            return selector2
        except Exception as e:
            print("selector2 for time period not found")
            
def select_historical(driver, freq):  
    print("historical data selection task started")
    disable_ad(driver)
    se = search_selector(driver)
    period_dropdown_div = driver.find_element(By.XPATH, f"{se}/div[1]/div[1]/div[1]/div/div/div[1]")
    action = ActionChains(driver)
    period_dropdown_div.click()
    time.sleep(3)
    if freq == 'daily':
        max_p_btn = driver.find_element(By.XPATH, f"{se}/div[1]/div[1]/div[1]/div/div/div[2]/div/ul[1]/li[1]/button")
        max_p_btn.click()
    else:
        max_p_btn = driver.find_element(By.XPATH, f"{se}/div[1]/div[1]/div[1]/div/div/div[2]/div/ul[2]/li[4]/button")
        max_p_btn.click()
    time.sleep(3)
    apply_btn = driver.find_element(By.XPATH, f"{se}/div[1]/div[1]/button")
    apply_btn.click()
    print("historical data selection task finished")

In [17]:
DRIVER_PATH= "/chromedriver/chromedriver"
options = webdriver.ChromeOptions()
options.add_argument("--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.5735.90 Safari/537.36")
options.add_argument("--window-size=1920,1080")
options.add_argument("--disable-extensions")
options.add_argument("--proxy-server='direct://'")
options.add_argument("--proxy-bypass-list=*")
options.add_argument("--start-maximized")
options.add_argument('--headless')
options.add_argument('--disable-gpu')
options.add_argument('--disable-dev-shm-usage')
options.add_argument('--no-sandbox')
options.add_argument('--ignore-certificate-errors')
service = Service(ChromeDriverManager(version='114.0.5735.90').install())
driver =  webdriver.Chrome(service=service, options=options)

[WDM] - Downloading: 100%|██████████| 8.29M/8.29M [00:04<00:00, 2.08MB/s]


In [None]:
REFERENCE = 'https://finance.yahoo.com'
Header = ["reference", "book", "date", "open", "high", "low", "close", "adj_close", "volume"]
n = len(Header)
Debug = False
freq = 'daily'
# Create a DataFrame using Pandas
#df = pd.DataFrame(columns=Header)

try:
    WebDriverWait(driver,5).until(EC.presence_of_element_located((By.TAG_NAME, "body")))
    try:
        for book in usd_books:
            print(f'Book: {book}')
            # TO-CHECK pepe-usd, paxg-usd
            target_url = f"https://finance.yahoo.com/quote/{book.upper()}/history?p={book.upper()}"
            driver.get(target_url)
            
            lookup_url = f"https://finance.yahoo.com/lookup?s={book.upper()}"
            while driver.current_url != target_url:
                driver.get(target_url)
                time.sleep(3) 
                if(lookup_url == driver.current_url):
                    break
        
            if(nomatchresult(driver) or not check_tab_header(driver)):
                print("skipping to next ticket")
                # save_unavailable_book(book)
                print("====================================================================")
                continue
            select_historical(driver, freq)
            time.sleep(1)

            is_at_bottom(driver)
            table = driver.find_element(By.XPATH, TBODY)
            # Get all rows of the table
            rows = table.find_elements(By.TAG_NAME, "tr")

            # Create an empty list to store the table data
            #table_data = []
            #df_book = pd.DataFrame(table_data, columns=Header)
            # Iterate through each row
            print("scraping data task started")
            for row in rows:
                # Get all columns (cells) of the row
                columns = row.find_elements(By.TAG_NAME, "td")
                row_data = []
                row_data = [column.text for column in columns if column.text != '-']
                if(len(row_data) != 7):
                    print("skipping to next row")
                    continue
                row_data = parse_row_data(row_data)
                row_data.insert(0, book)
                row_data.insert(0, REFERENCE)
                save_to_postgres(row_data, Header, conn)
                #df_book = pd.concat([df_book, pd.DataFrame([row_data], columns=Header)], ignore_index=True)
            #df = pd.concat([df, df_book], ignore_index=True)
            #num_rows, num_columns = df.shape
            #last_five_rows = df.tail(3)
            print("scraping data task finished")
            print("====================================================================")
        print("All book were analyzed")
    except NoSuchElementException as nse:
        print(nse)
        print("-----")
        print(str(nse))
        print("-----")
        print(nse.args)
        print("=====")
except TimeoutException as toe:
    print(toe)
    print("-----")
    print(str(toe))
    print("-----")
    print(toe.args)
finally:
    if(Debug):
        delete_table("historical", conn)
driver.close()

Book: tusd-btc
queried book was found!
historical data selection task started
ad element was not found
selector1 for time period not found trying the second
historical data selection task finished
scrolling down task finished
scraping data task started
scraping data task finished
Book: btc-ars
output: no results for 'btc-ars'
skipping to next ticket
Book 'btc-ars' added to unavailable_books.csv
Book: btc-dai
output: no results for 'btc-dai'
skipping to next ticket
Book 'btc-dai' added to unavailable_books.csv
Book: btc-usd
queried book was found!
historical data selection task started
ad element was not found
selector1 for time period not found trying the second
historical data selection task finished
scrolling down task finished
scraping data task started
scraping data task finished
Book: xrp-usd
queried book was found!
historical data selection task started
ad element was not found
selector1 for time period not found trying the second
historical data selection task finished
scrolling

selector1 for time period not found trying the second
historical data selection task finished
scrolling down task finished
scraping data task started
scraping data task finished
Book: yfi-usd
queried book was found!
historical data selection task started
ad element was not found
selector1 for time period not found trying the second
historical data selection task finished
scrolling down task finished
scraping data task started
scraping data task finished
Book: sand-usd
queried book was found!
historical data selection task started
ad element was not found
selector1 for time period not found trying the second
historical data selection task finished
scrolling down task finished
scraping data task started
scraping data task finished
Book: shib-usd
queried book was found!
historical data selection task started
ad element was not found
selector1 for time period not found trying the second
historical data selection task finished
scrolling down task finished
scraping data task started
scraping