In [1]:
import sys
import os
# Get the absolute path to the project directory
project_dir = os.path.abspath("..")

# Append the project directory to sys.path
if project_dir not in sys.path:
    sys.path.append(project_dir)

import pandas as pd
import yaml
from datetime import datetime as dt, date
import requests
import logging
from datetime import datetime, timedelta
from src.common.AssetData import AssetData
from src.common.AssetDataService import AssetDataService
from src.common.AssetFileInOut import AssetFileInOut
from src.databaseService.Merger import Merger
from src.databaseService.Parser import Parser_AV
from src.databaseService.CleanData import CleanData
from src.common.AssetDataService import AssetDataService

from alpha_vantage.timeseries import TimeSeries
from alpha_vantage.fundamentaldata import FundamentalData

logger = logging.getLogger()
logger.setLevel(logging.INFO)
handler = logging.StreamHandler(sys.stdout)
formatter = logging.Formatter(fmt="%(message)s")
handler.setFormatter(formatter)
if not logger.hasHandlers():
    logger.addHandler(handler)
else:
    logger.handlers[:] = [handler]
# Usage
logger.info("This will print to the notebook’s output cell")

This will print to the notebook’s output cell


In [2]:
# Define paths
current_dir = os.getcwd()
desired_folder = "secrets"
absolute_path_to_folder = os.path.join(os.path.abspath(os.path.join(current_dir, "..")), "secrets")

# Path to the YAML file
yaml_file_path = os.path.join("../secrets", "alphaVantage.yaml")

# Read and load the YAML file
try:
    with open(yaml_file_path, 'r') as file:  # Open the YAML file for reading
        config = yaml.safe_load(file)  # Load the YAML content
        apiKey = config['alphaVantage_premium']['apiKey']  # Access the required key
except PermissionError:
    print("Permission denied. Please check file permissions.")
except FileNotFoundError:
    print("File not found. Please verify the path.")
except KeyError:
    print("KeyError: Check the structure of the YAML file.")
except yaml.YAMLError as e:
    print("YAML Error:", e)

In [3]:
ticker = "AAPL"
ts = TimeSeries(key=apiKey, output_format='pandas')
fd = FundamentalData(key=apiKey, output_format='pandas')

datashareprice, _ = ts.get_daily_adjusted(symbol=ticker, outputsize='full')

url = 'https://www.alphavantage.co/query?function=INCOME_STATEMENT&symbol='+ticker+'&apikey='+apiKey
incStatementData = requests.get(url).json()
url = 'https://www.alphavantage.co/query?function=CASH_FLOW&symbol='+ticker+'&apikey='+apiKey
cashFlowData = requests.get(url).json()
url = 'https://www.alphavantage.co/query?function=BALANCE_SHEET&symbol='+ticker+'&apikey='+apiKey
balanceSheetData = requests.get(url).json()
url = 'https://www.alphavantage.co/query?function=EARNINGS&symbol='+ticker+'&apikey='+apiKey
earningsData = requests.get(url).json()

if incStatementData=={} or cashFlowData == {} or balanceSheetData == {} or earningsData == {}:
    raise ImportError(f"Empty Financial Data")

tickershareprice = Parser_AV(sharepriceData=datashareprice).parse_shareprice()
tickershareprice_dated = tickershareprice.copy()
tickershareprice_dated["Date"] = tickershareprice_dated["Date"].apply(lambda x: dt.strptime(x, '%Y-%m-%d').date())
mask = (tickershareprice_dated["Date"] >= date(2024, 1, 1)) & \
       (tickershareprice_dated["Date"] <= date(2024, 12, 31))
tickershareprice_dated = tickershareprice_dated.loc[mask]
tickershareprice_dated["Date"] = tickershareprice_dated["Date"].apply(lambda x: str(x))
tickerfinquar, tickerfinann = Parser_AV(
    incStatementData = incStatementData, 
    cashFlowData = cashFlowData, 
    balanceSheetData = balanceSheetData, 
    earningsData = earningsData
).parse_financials()

## Mock instances

In [4]:
price_cols = ['Date','Open','High','Low','Close','AdjClose','Volume','Dividends','Splits']
finq_cols = ['fiscalDateEnding','reportedDate','reportedEPS','estimatedEPS',
             'surprise','surprisePercentage','reportTime','grossProfit','totalRevenue',
             'ebit','ebitda','totalAssets','totalCurrentLiabilities',
             'totalShareholderEquity','commonStockSharesOutstanding','operatingCashflow']
fina_cols = ['fiscalDateEnding','reportedEPS','grossProfit','totalRevenue',
             'ebit','ebitda','totalAssets','totalCurrentLiabilities',
             'totalShareholderEquity','operatingCashflow']

# 1. Empty instance
empty_price = pd.DataFrame(columns=price_cols)
empty_finq = pd.DataFrame(columns=finq_cols)
empty_fina = pd.DataFrame(columns=fina_cols)
asset_empty = AssetData(ticker="EMPTY", shareprice=empty_price,
                        financials_quarterly=empty_finq,
                        financials_annually=empty_fina)

# 2. Single-row instance
single_price = pd.DataFrame([{
    'Date': '2024-03-28','Open': 100.0,'High': 105.0,'Low': 99.0,'Close': 104.0,'AdjClose': 104.0,'Volume': 1_000_000,'Dividends': 0.5,'Splits': 0.0
}])
single_finq = pd.DataFrame([{
    'fiscalDateEnding': '2024-03-31','reportedDate': '2024-04-15','reportedEPS': 1.2,'estimatedEPS': 1.1,'surprise': 0.1,'surprisePercentage': 9.1,'reportTime': 'post-market','grossProfit': 500_000,'totalRevenue': 1_000_000,'ebit': 200_000,'ebitda': 250_000,'totalAssets': 5_000_000,'totalCurrentLiabilities': 1_000_000,'totalShareholderEquity': 4_000_000,'commonStockSharesOutstanding': 10_000_000,'operatingCashflow': 150_000
}])
single_fina = pd.DataFrame([{
    'fiscalDateEnding': '2024-12-31','reportedEPS': 4.8,'grossProfit': 2_000_000,'totalRevenue': 4_000_000,'ebit': 800_000,'ebitda': 1_000_000,'totalAssets': 20_000_000,'totalCurrentLiabilities': 5_000_000,'totalShareholderEquity': 15_000_000,'operatingCashflow': 600_000
}])
asset_single = AssetData(ticker="ONE", shareprice=single_price,
                         financials_quarterly=single_finq,
                         financials_annually=single_fina)

# 3. Three-row instance
three_price = pd.DataFrame([
    {'Date':'2024-01-10','Open': 50,'High': 55,'Low':49,'Close':54,'AdjClose':54,'Volume':500_000,'Dividends':0,'Splits':0},
    {'Date':'2024-02-10','Open': 52,'High': 57,'Low':51,'Close':56,'AdjClose':56,'Volume':600_000,'Dividends':0,'Splits':0},
    {'Date':'2024-03-10','Open': 54,'High': 59,'Low':53,'Close':58,'AdjClose':58,'Volume':700_000,'Dividends':0,'Splits':0}
])
three_finq = pd.DataFrame([
    {'fiscalDateEnding':'2024-03-31','reportedDate':'2024-04-15','reportedEPS':0.8,'estimatedEPS':0.7,'surprise':0.1,'surprisePercentage':14.3,'reportTime':'pre-market','grossProfit':300_000,'totalRevenue':600_000,'ebit':100_000,'ebitda':120_000,'totalAssets':3_000_000,'totalCurrentLiabilities':800_000,'totalShareholderEquity':2_200_000,'commonStockSharesOutstanding':8_000_000,'operatingCashflow':90_000},
    {'fiscalDateEnding':'2024-06-30','reportedDate':'2024-07-15','reportedEPS':0.9,'estimatedEPS':0.85,'surprise':0.05,'surprisePercentage':5.9,'reportTime':'post-market','grossProfit':320_000,'totalRevenue':640_000,'ebit':110_000,'ebitda':130_000,'totalAssets':3_200_000,'totalCurrentLiabilities':820_000,'totalShareholderEquity':2_380_000,'commonStockSharesOutstanding':8_000_000,'operatingCashflow':95_000},
    {'fiscalDateEnding':'2024-09-30','reportedDate':'2024-10-15','reportedEPS':1.0,'estimatedEPS':0.95,'surprise':0.05,'surprisePercentage':5.3,'reportTime':'post-market','grossProfit':340_000,'totalRevenue':680_000,'ebit':120_000,'ebitda':140_000,'totalAssets':3_400_000,'totalCurrentLiabilities':840_000,'totalShareholderEquity':2_560_000,'commonStockSharesOutstanding':8_000_000,'operatingCashflow':100_000}
])
three_fina = pd.DataFrame([
    {'fiscalDateEnding':'2024-04-30','reportedEPS':3.5,'grossProfit':1_200_000,'totalRevenue':2_400_000,'ebit':500_000,'ebitda':600_000,'totalAssets':12_000_000,'totalCurrentLiabilities':3_000_000,'totalShareholderEquity':9_000_000,'operatingCashflow':400_000},
    {'fiscalDateEnding':'2024-08-31','reportedEPS':3.7,'grossProfit':1_300_000,'totalRevenue':2_600_000,'ebit':520_000,'ebitda':620_000,'totalAssets':12_500_000,'totalCurrentLiabilities':3_100_000,'totalShareholderEquity':9_400_000,'operatingCashflow':420_000},
    {'fiscalDateEnding':'2024-12-31','reportedEPS':3.9,'grossProfit':1_400_000,'totalRevenue':2_800_000,'ebit':540_000,'ebitda':640_000,'totalAssets':13_000_000,'totalCurrentLiabilities':3_200_000,'totalShareholderEquity':9_800_000,'operatingCashflow':440_000}
])
asset_three = AssetData(ticker="THREE", shareprice=three_price,
                        financials_quarterly=three_finq,
                        financials_annually=three_fina)


## Testing merge_shareprice

## Shareprice

In [5]:
# Test 1: init
asset_empty_new = AssetDataService.copy(asset_empty)
merger = Merger(asset_empty_new)
merger.merge_shareprice(tickershareprice_dated)
assert asset_empty_new.shareprice.equals(tickershareprice_dated), f"Expected no changes, but asset state mutated: {asset_empty_new.shareprice}" 
print('✅ Test 1 passed: New rows appended correctly.')

# Test 2: single overtake
asset_single_new = AssetDataService.copy(asset_single)
merger = Merger(asset_single_new)
merger.merge_shareprice(tickershareprice_dated)
assert asset_single_new.shareprice.equals(tickershareprice_dated), f"Expected no changes, but asset state mutated: {asset_single_new.shareprice}" 
print('✅ Test 2 passed: Changes single completely.')

  No existing shareprice data for ticker EMPTY.
✅ Test 1 passed: New rows appended correctly.
  Open_ratio: 1 values outside +-1%
  High_ratio: 1 values outside +-1%
  Low_ratio: 1 values outside +-1%
  Close_ratio: 1 values outside +-1%
  Splits_ratio: 1 values outside +-1%
  Volume_diff: 1 values outside +-1%
  Dividends_diff: 1 values outside +-1%
  Added 251 new rows to shareprice data of ticker ONE.


AssertionError: Expected no changes, but asset state mutated:            Date     Open      High      Low   Close    AdjClose      Volume  \
0    2024-01-02  187.150  188.4400  183.885  185.64  184.532826  82488674.0   
1    2024-01-03  184.220  185.8800  183.430  184.25  183.151116  58414460.0   
2    2024-01-04  182.150  183.0872  180.880  181.91  180.825072  71983570.0   
3    2024-01-05  181.990  182.7600  180.170  181.18  180.099426  62196924.0   
4    2024-01-08  182.085  185.6000  181.500  185.56  184.453303  59144470.0   
..          ...      ...       ...      ...     ...         ...         ...   
247  2024-12-24  255.490  258.2100  255.290  258.20  257.916762  23234705.0   
248  2024-12-26  258.190  260.1000  257.630  259.02  258.735862  27262983.0   
249  2024-12-27  257.830  258.7000  253.060  255.59  255.309625  42355321.0   
250  2024-12-30  252.230  253.5000  250.750  252.20  251.923344  35557542.0   
251  2024-12-31  252.440  253.2800  249.430  250.42  250.145296  39480718.0   

     Dividends  Splits  
0          0.0     1.0  
1          0.0     1.0  
2          0.0     1.0  
3          0.0     1.0  
4          0.0     1.0  
..         ...     ...  
247        0.0     1.0  
248        0.0     1.0  
249        0.0     1.0  
250        0.0     1.0  
251        0.0     1.0  

[252 rows x 9 columns]

In [None]:

merger.merge_shareprice(fullSharePrice)
assert asset.shareprice.equals(result), f"Expected no changes, but asset state mutated: {asset.shareprice}" 
print('✅ Test 0.2 passed: Asset unchanged when no new rows.')

asset_empty = AssetDataService.defaultInstance(ticker = "TEST")
merger_empty = Merger_AV(asset_empty)
merger_empty.merge_shareprice(fullSharePrice)
fullSharePrice.index.name = "date"
fullSharePrice = fullSharePrice.iloc[::-1]
fullSharePrice.reset_index(inplace=True)
fullSharePrice.rename(columns={
            'date': 'Date',
            '1. open': 'Open',
            '2. high': 'High',
            '3. low': 'Low',
            '4. close': 'Close',
            '5. adjusted close': 'AdjClose',
            '6. volume': 'Volume',
            '7. dividend amount': 'Dividends',
            '8. split coefficient': 'Splits'
        }, inplace=True)
fullSharePrice['Date'] = fullSharePrice['Date'].apply(lambda ts: str(ts.date()))
tmp = asset_empty.shareprice
assert asset_empty.shareprice.equals(fullSharePrice), f"Expected takeover of fullShareprice, but it changed: {asset_empty.shareprice}"
print('✅ Test 0.3 passed: Asset unchanged when no new rows.')

# Test 1: New rows
asset = AssetData(ticker='TEST', shareprice=_df_init.copy())
asset_copy = AssetData(ticker='TEST', shareprice=_df_init.copy())
merger = Merger_AV(asset)
full = make_full_shareprice([date(2021, 1, 2), date(2021, 1, 3)])
merger.merge_shareprice(full)
# asset.shareprice should be unchanged, so asset equals asset_copy
assert not asset.shareprice.equals(asset_copy.shareprice), f"Expected changes, but asset state did not mutate: {asset.shareprice}" 
print('✅ Test 1 passed: Asset changed when no new rows.')

# Test 2: Out-of-order input
asset = AssetData(ticker='TEST', shareprice=_df_init.copy())
merger = Merger_AV(asset)
dates = [date(2021, 1, 5), date(2021, 1, 4)]
full = make_full_shareprice(dates)
merger.merge_shareprice(full)
res_dates = [dt.strptime(d, '%Y-%m-%d').date() for d in asset.shareprice['Date']]
assert all(d in res_dates for d in dates), "Not all new dates present after merge."
assert res_dates == sorted(res_dates), "Dates are not sorted after merge."
print('✅ Test 2 passed: Out-of-order input merged and sorted correctly.')

# Test 3: Splits and dividends changes
asset = AssetData(ticker='TEST', shareprice=_df_init.copy())
asset_copy = AssetData(ticker='TEST', shareprice=_df_init.copy())
merger = Merger_AV(asset)
chg_date = date(2021, 1, 2)
overrides = {
    '7. dividend amount': [0.5],
    '8. split coefficient': [2.0]
}
full = make_full_shareprice([chg_date], overrides)
merger.merge_shareprice(full)
assert not asset.shareprice.equals(asset_copy.shareprice), f"Expected changes, but asset state did not mutate: {asset.shareprice}" 
print('✅ Test 3 passed: Splits and dividends changes logged without new rows.')

# Test 4: String-vs-date resilience
# Introduce malformed date in existing data
_bad = _df_init.copy()
_bad.loc[0, 'Date'] = '2021/01/02'
asset = AssetData(ticker='TEST', shareprice=_bad)
merger = Merger_AV(asset)
try:
    full = make_full_shareprice([date(2021, 1, 2)])
    merger.merge_shareprice(full)
    raise AssertionError('Expected ValueError for malformed date, but none was raised.')
except ValueError:
    print('✅ Test 4 passed: Malformed date string raises ValueError.')

## Financials

In [None]:
cols_ann = [
    'fiscalDateEnding','reportedEPS','grossProfit','totalRevenue','ebit','ebitda',
    'totalAssets','totalCurrentLiabilities','totalShareholderEquity',
    'operatingCashflow'
]
cols_quar = [
    'fiscalDateEnding','reportedDate','reportedEPS','estimatedEPS','surprise',
    'surprisePercentage','reportTime','grossProfit','totalRevenue','ebit',
    'ebitda','totalAssets','totalCurrentLiabilities','totalShareholderEquity',
    'commonStockSharesOutstanding','operatingCashflow'
]

#Test 0: init merge financials
asset = AssetDataService.defaultInstance()
Merger_AV(asset).merge_financials(financials_annually, financials_quarterly)
finann = financials_annually[cols_ann].copy()
finann['fiscalDateEnding'] = finann['fiscalDateEnding'].apply(lambda ts: str(ts.date()))
finquar = financials_quarterly[cols_quar].copy()
finquar['fiscalDateEnding'] = finquar['fiscalDateEnding'].apply(lambda ts: str(ts.date()))
finquar['reportedDate'] = finquar['reportedDate'].apply(
            lambda x: x.date().__str__()
        )
resann = asset.financials_annually
resquar = asset.financials_quarterly
assert asset.financials_annually.equals(finann), f"Expected take over, but not the same annual financials."
assert asset.financials_quarterly.equals(finquar), f"Expected take over, but not the same quarterly financials."
print('✅ Test 0.1 passed: New rows appended correctly.')

asset.financials_annually = asset.financials_annually.drop(index=len(asset.financials_annually)-3)
asset.financials_quarterly = asset.financials_quarterly.drop(index=len(asset.financials_quarterly)-3)
resann = asset.financials_annually
resquar = asset.financials_quarterly
assert not asset.financials_annually.equals(finann), f"Expected difference, but the same annual financials."
assert not asset.financials_quarterly.equals(finquar), f"Expected difference, but the same quarterly financials."
Merger_AV(asset).merge_financials(financials_annually, financials_quarterly)
resann = asset.financials_annually
resquar = asset.financials_quarterly
assert asset.financials_annually.equals(finann), f"Expected take over, but not the same annual financials."
assert asset.financials_quarterly.equals(finquar), f"Expected take over, but not the same quarterly financials."
print('✅ Test 0.2 passed: Disturbance corrected correctly.')

# Test 1: Check annual results
Merger_AV(test_Asset).merge_financials(df_ann, df_quar)
ann = test_Asset.financials_annually
assert "2023-12-31" in ann['fiscalDateEnding'].values, "New annual row not appended"
# original row had None reportedEPS, should remain None (we don't overwrite non‐NaN)
# but totalRevenue was None and new for 2022 was skipped (same year), so stays None
assert ann.shape[0] == 2, f"Expected 2 annual rows, got {ann.shape[0]}"
print('✅ Test 1 passed: Check annual results.')

# Test 2: Check quarterly results
q = test_Asset.financials_quarterly
dates = set(q['fiscalDateEnding'].values)
# original 2023-03-31 should still exist
assert "2023-03-31" in dates
# recent_q and old_q (formatted) should be present
for d in [recent_q, old_q]:
    assert str(d) in dates, f"Quarter {d} missing"
# the filled fields
row = q[q['fiscalDateEnding']=="2023-03-31"].iloc[0]
assert float(row['reportedEPS']) == 0.5, "reportedEPS not filled correctly"
assert float(row['grossProfit']) == 105, "grossProfit not filled correctly"
print('✅ Test 2 passed: Check quarterly results.')

✅ Test 0.1 passed: New rows appended correctly.
✅ Test 0.2 passed: Disturbance corrected correctly.
✅ Test 1 passed: Check annual results.
✅ Test 2 passed: Check quarterly results.


  existing_ann = pd.concat((existing_ann, pd.DataFrame([new], columns=existing_ann.columns)), ignore_index=True)
