# Import

In [None]:
# adding common module to path, to be visible
import sys
sys.path.append("../../common")

# data analysis
import pandas as pd
import numpy as np
np.set_printoptions(precision=4)
np.set_printoptions(suppress=True)
pd.options.display.float_format = '{:.4f}'.format
from datetime import datetime

# api related
import requests
import kagglehub
import yfinance as yf
import zipfile

# database related
import psycopg
import os
import json
from tabulate import tabulate
from utility import dry_insert_into_db, insert_into_db, describe_table, SECRETS, CONNECTION_STRING

# web scraping
from bs4 import BeautifulSoup
import bs4
import requests
import json

from selenium import webdriver

from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By

from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait

from selenium.common.exceptions import (
    NoSuchElementException,
    ElementClickInterceptedException,
)

import time
from time import sleep
import random
from io import StringIO

# logging related
import logging
from pathlib import Path

# misc
import pickle
from typing import Any
import re
import shutil

In [None]:
pwf = str(Path.cwd()).split("bitcoin_analysis")[1]
logger = logging.getLogger("default_logger")

file_handler = logging.FileHandler("/Users/Misha/Documents/python_projects/data_analysis/bitcoin_analysis/logging/logger.txt")
file_formater = logging.Formatter(
    f"{pwf}\n" +
    f">>>\n" +
    f"%(levelname)s: %(message)s.\n" + 
    f"<<< %(asctime)s\n"
)

console_handler = logging.StreamHandler()
console_formater = logging.Formatter(f"Logged %(levelname)s in {pwf}")

file_handler.setFormatter(file_formater)
file_handler.setLevel(logging.INFO)

console_handler.setLevel(logging.INFO)
console_handler.setFormatter(console_formater)

logger.setLevel(logging.INFO)
logger.addHandler(file_handler)
logger.addHandler(console_handler)

In [None]:
def dump_pickle(obj:Any, name:str, *, force:bool=False) -> None:
    """
    Dump serialized object at the designated location. Wrapper around `pickle.dumps`
    Assumes correct hard coded path
    
    :Params:
        - obj: any python object to be selialized
        - name: filename to attach (no `.pkl` extension)
        - force: whether to overwrite file if it exists
    :Raises:
        - ValueError: `name` contains file extension `.pkl`
        - FileExistsError: if a file with `name` exists
        - RuntimeError: Wrapper around whatever pickle.dumps raises
    """
    from pathlib import Path
    if ".pkl" in name:
        raise ValueError(f"`.pkl` is in file name: {name}")
        
    path = f"/Users/Misha/Documents/python_projects/data_analysis/bitcoin_analysis/pickles/{name}.pkl"
    
    if Path(path).exists() and not force:
        raise FileExistsError(f"The file '{name}' exists. Aborting write")
    
    with open(path, mode="wb") as f:
        try:
            bytes_ = pickle.dumps(obj)
        except Exception as err:
            raise RuntimeError(f"An internal error occurred while trying to pickle data ({type(err).__name__}): {err}") from None        
        
        f.write(bytes_)

In [None]:
def load_pickle(name:str) -> Any:
    """
    Load serialized object at the designated location. Wrapper around `pickle.loads`
    Assumes correct hard coded path
    
    :Params:
        - name: filename to attach (no `.pkl` extension)
    :Raises:
        - ValueError: `name` contains file extension `.pkl`
        - FileExistsError: if a file with `name` exists
        - RuntimeError: Wrapper around whatever pickle.dumps raises
    """
    from pathlib import Path
    if ".pkl" in name:
        raise ValueError(f"`.pkl` is in file name: {name}")
        
    path = f"/Users/Misha/Documents/python_projects/data_analysis/bitcoin_analysis/pickles/{name}.pkl"
    
    if not Path(path).exists():
        raise FileNotFoundError(f"The file '{name}[.pkl]' is not found in pickles directory.")
    
    with open(path, mode="rb") as f:
        bytes_ = f.read()
        
        try:
            obj = pickle.loads(bytes_)
        except Exception as err:
            raise RuntimeError(f"An internal error occurred while trying to pickle data ({type(err).__name__}): {err}") from None        
        
        return obj

In [None]:
def extract_asset_volume(
    path: str | Path,
    asset: str
) -> float:
    """
    Extract the specified asset's volume(1) from a COMEX volume report (originaly Web_Volume_Report_CMEG_20####.pdf).
    Assume: each asset's name is unique.
    
    :Params:
        - path: the path (with the filename) to the report
        - asset: the full name of the asset to extract (case sensitive)
    :Raises:
        - FileNotFoundError: if path does not exists or the path is not a `.pdf` file
        - ValueError: if the required asset does not exist or if number format is invalid (assume xxx,xxx.xx)
        - RuntimeError: Wrapper around whatever PdfReader raises

    (1) Volume := the trading volume from the month of the report. The leftmost column: "VOLUME ### 20##"
    """
    from pathlib import Path
    import pypdf

    path = Path(path)

    if not path.exists() or not path.is_file():
        raise FileNotFoundError(f"Invalid PDF path: {path}")
    elif path.suffix.lower() != ".pdf":
        raise FileNotFoundError(f"File {path.name} has an invalid file extension: '{path.suffix.lower()}'")

    try:
        reader = pypdf.PdfReader(path)
    except Exception as err:
        raise RuntimeError(
            f"An internal error occurred while trying to read the PDF "
            f"({type(err).__name__}): {err}"
        ) from None
    
    for page in reader.pages:
        try:
            text = page.extract_text()
        except Exception as err:
            raise RuntimeError(
                f"An internal error occurred while trying to extract text ({type(err).__name__}): {err}"
            ) from None
        
        # adding space to reduce number of errors coming from float converting text
        # since incomplete names are also positively located in text
        # which leads to script trying to covert the rest of the name to value
        if asset + " " not in text:
            # hits also if no text, since asset => text
            continue

        for line in text.splitlines():
            if not line.startswith(asset + " "):
                continue
            
            remainder = line[len(asset):].strip().split()
            if not remainder:
                break

            value = remainder[0].replace(",", "")
            # on error raises
            try:
                value = float(value)
            except ValueError as err:
                raise ValueError(f"{err}. Passed asset name ({asset}) may cause this issue. You must pass the whole name (case sensitive)") from None

            return value

    raise ValueError(f"Asset '{asset}' not found.")