In [1]:
import os
import sys
import pandas as pd

In [2]:
# Add project root to sys.path (adjust as needed)
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))  # go up one directory
sys.path.append(project_root)

In [3]:
from io import StringIO
from dotenv import load_dotenv
from azure.storage.blob import BlobServiceClient

from src.utils.helper import format_to_decimal, check_or_create_logfile, append_to_log

In [4]:
#service_client = BlobServiceClient.from_connection_string(os.getenv("AZURE_CONNECTION_STRING"))
#blob_container = service_client.get_container_client(os.getenv("CONTAINER_NAME"))

In [103]:
def clean_data(df):
    df["Volume"] = df["Volume"].str.replace(',','').replace("'","").replace(".","").astype(int)
    df["Date"] = pd.to_datetime(df["Date"], format='%m/%d/%Y')
    
    df["Open"] = 99 #df["Open"].apply(format_to_decimal).astype(float)
    df["High"] = df["High"].apply(format_to_decimal).astype(float)
    df["Close"] = df["Close"].apply(format_to_decimal).astype(float)
    df["Low"] = df["Low"].apply(format_to_decimal).astype(float)
    return df

In [104]:
def add_features(df: pd.DataFrame, source: str):

    df.insert(0, "Source", source)    

    # Dates
    df["Month"] = df["Date"].dt.month
    df["Day"] = df["Date"].dt.day
    df["Year"] = df["Date"].dt.year
    df["DayOfWeek"] = df["Date"].dt.dayofweek

    df["MA_5"] = df["Close"].rolling(5).mean().apply(format_to_decimal)
    df["MA_20"] = df["Close"].rolling(20).mean().apply(format_to_decimal)
    df["EMA_5"] = df["Close"].ewm(span=5, adjust=False).mean().apply(format_to_decimal)
    df["EMA_20"] = df["Close"].ewm(span=20, adjust=False).mean().apply(format_to_decimal)
    df["STD_5"] = df["Close"].rolling(5).mean().apply(format_to_decimal)
    df["STD_20"] = df["Close"].rolling(20).mean().apply(format_to_decimal)

    df["DailyReturn"] = df["Close"].pct_change().apply(format_to_decimal)
    df["Volatility"] = df["DailyReturn"].rolling(20).std().apply(format_to_decimal)
    df["PriceChange"] = (df["Close"] - df["Open"]).apply(format_to_decimal)
 
    return df

In [105]:
def upload_silver_to_blob(df, serviceClient, containerName, blobName):
#    if 'Date' in df.columns:
 #       df['Date'] = pd.to_datetime(df['Date'], errors='coerce')  # Safety
  #      df['Date'] = df['Date'].dt.strftime('%Y-%m-%d')  # Format to YYYY-MM-DD


    csv_buffer = StringIO()
    df.to_csv(csv_buffer, index=False, na_rep='', encoding='utf-8')
    csv_buffer.seek(0)

    blob_client = serviceClient.get_blob_client(container=containerName, blob=blobName)
    blob_client.upload_blob(csv_buffer.getvalue(), overwrite=True)

In [106]:
def append_latest_to_silver(df, serviceClient, containerName, blobName):
    try:
        blob_client = serviceClient.get_blob_client(container=containerName, blob=blobName)
        existing_data = blob_client.download_blob().readall().decode('utf-8')
        existing_df = pd.read_csv(StringIO(existing_data))

        df = pd.concat([existing_df, df], ignore_index=True)
    except Exception as e:
        print(f"Error reading existing data: {e}")

    upload_silver_to_blob(df, serviceClient, containerName, blobName)

In [113]:
def main():
    
    load_dotenv()

    combinedData = pd.DataFrame() # STAGING DATAFAME TO HANDLE FILES
    serviceClient = BlobServiceClient.from_connection_string(os.getenv("AZURE_CONNECTION_STRING"))
    blobContainer = serviceClient.get_container_client(os.getenv("CONTAINER_NAME"))

    bronzeLocation = os.getenv("BRONZE_LOCATION")
    silverFile = os.getenv("SILVER_LOCATION")
   
    for blob in blobContainer.list_blobs(name_starts_with=f"{bronzeLocation}"):
        
        blob_client = blobContainer.get_blob_client(blob.name)
        stockPrices = blob_client.download_blob().readall().decode('utf-8')

        data = pd.read_csv(StringIO(stockPrices), keep_default_na=False, na_values=[''])
        
        data = data.head(1)
        data = clean_data(data)
        data = add_features(data, os.path.basename(blob.name).replace('.csv',''))

        

        combinedData = pd.concat([combinedData, data], ignore_index=True)
    
    upload_silver_to_blob(combinedData, serviceClient, os.getenv("CONTAINER_NAME"), f"{silverFile}silver_output.csv")
    #append_latest_to_silver(combined_df, serviceClient, os.getenv("CONTAINER_NAME"), f"{silverFileOutput}silver_output.csv")
    return combinedData

In [114]:
s = main()

In [115]:
s

Unnamed: 0,Source,Date,Open,High,Low,Close,Volume,Month,Day,Year,DayOfWeek,MA_5,MA_20,EMA_5,EMA_20,STD_5,STD_20,DailyReturn,Volatility,PriceChange
0,bdo,2025-07-29,99,149.9,148.5,149.0,2563290,7,29,2025,1,,,149.0,149.0,,,,,50.0
1,creit,2025-07-30,99,3.69,3.66,3.67,1370000,7,30,2025,2,,,3.67,3.67,,,,,-95.33
2,globe,2025-07-30,99,1671.0,1660.0,1669.0,25220,7,30,2025,2,,,1669.0,1669.0,,,,,1570.0
3,mreit,2025-07-30,99,14.38,14.24,14.2,588600,7,30,2025,2,,,14.2,14.2,,,,,-84.8
4,rcr,2025-07-30,99,7.8,7.72,7.8,1208600,7,30,2025,2,,,7.8,7.8,,,,,-91.2


In [101]:
serviceClient = BlobServiceClient.from_connection_string(os.getenv("AZURE_CONNECTION_STRING"))
blobContainer = serviceClient.get_container_client(os.getenv("CONTAINER_NAME"))
blob_name = f"{os.getenv('SILVER_OUTPUT')}silver_output.csv"
blob_client = blobContainer.get_blob_client(blob_name)
content = blob_client.download_blob().readall().decode('utf-8')
existing_data = blob_client.download_blob().readall().decode('utf-8')
existing_df = pd.read_csv(StringIO(existing_data))


In [102]:
existing_df

Unnamed: 0,Source,Date,Open,High,Low,Close,Volume,Month,Day,Year,DayOfWeek,MA_5,MA_20,EMA_5,EMA_20,STD_5,STD_20,DailyReturn,Volatility,PriceChange
0,bdo,2025-07-29,0,149.9,148.5,149.0,2563290,7,29,2025,1,,,149.0,149.0,,,,,149.0
1,creit,2025-07-30,0,3.69,3.66,3.67,1370000,7,30,2025,2,,,3.67,3.67,,,,,3.67
2,globe,2025-07-30,0,1671.0,1660.0,1669.0,25220,7,30,2025,2,,,1669.0,1669.0,,,,,1669.0
3,mreit,2025-07-30,0,14.38,14.24,14.2,588600,7,30,2025,2,,,14.2,14.2,,,,,14.2
4,rcr,2025-07-30,0,7.8,7.72,7.8,1208600,7,30,2025,2,,,7.8,7.8,,,,,7.8
5,bdo,2025-07-29 00:00:00,0,149.9,148.5,149.0,2563290,7,29,2025,1,,,149.0,149.0,,,,,149.0
6,creit,2025-07-30 00:00:00,0,3.69,3.66,3.67,1370000,7,30,2025,2,,,3.67,3.67,,,,,3.67
7,globe,2025-07-30 00:00:00,0,1671.0,1660.0,1669.0,25220,7,30,2025,2,,,1669.0,1669.0,,,,,1669.0
8,mreit,2025-07-30 00:00:00,0,14.38,14.24,14.2,588600,7,30,2025,2,,,14.2,14.2,,,,,14.2
9,rcr,2025-07-30 00:00:00,0,7.8,7.72,7.8,1208600,7,30,2025,2,,,7.8,7.8,,,,,7.8
