In [1]:
# import required libraries
import os
import numpy as np
import pandas as pd
#import warnings.filterwarnings('ignore')
import dotenv
import asyncpg
import asyncio
import pandas as pd 

# load environment variables
dotenv.load_dotenv()

# database url
DATABASE_URL = f"postgresql://{os.getenv('user')}:{os.getenv('pass')}@{os.getenv('host')}:{os.getenv('port')}/{os.getenv('db')}"

# create an async database connection pool
async def connect_db():
    """Initialize the async database connection pool."""
    global db_pool
    db_pool = await asyncpg.create_pool(DATABASE_URL, min_size=1, max_size=10)
    print('Database connected successfully!')

# close the connection pool
async def disconnect_db():
    """Close the database connection pool."""
    global db_pool
    if db_pool:
        await db_pool.close()
    print('Database connection closed!')


async def table2df(schema_name:str,table_name: str, timeframe:str):
    """
        Get data from a table in the database and return as a Pandas DataFrame.

        parameters
        ----------
        table_name (str): The name of the table in the database.

        returns
        -------
        df (pd.DataFrame): The data from the table as a Pandas DataFrame.
    """

    try:
        # get a connection from the pool
        async with db_pool.acquire() as conn:
            
            # query (utc)
            query = f"SELECT * FROM {schema_name}.{table_name}"
            rows = await conn.fetch(query)

            # convert asyncpg.Record to a list of dictionaries & convert to pandas
            df = pd.DataFrame([dict(row) for row in rows])

            if df.empty:
                return df  # return an empty DataFrame if no data

            # standardize all column names to lowercase
            df.columns = df.columns.str.lower()

            # set date_time column as index
            df.set_index(f"bucket_{timeframe}", inplace=True)

            # ensure the date_time column is in datetime format (optional, but recommended)
            df.index = pd.to_datetime(df.index)

            df.replace(0.0, pd.NA, inplace=True)
            df.dropna(how="any", inplace=True)
            # ensure numeric columns are float64
            df = df.astype("float")

            return df

    except Exception as e:
        print("An error occurred:", e)
        return pd.DataFrame()  # return an empty DataFrame in case of failure


In [2]:
await connect_db()

Database connected successfully!


In [3]:
# retrieve 6hour gold data
gold_6h = await table2df("gold", "ohlc_data_6hr_bid_xau_usd", "6h")
# retrieve daily gold data 
gold_daily = await table2df("gold", "ohlc_data_daily_bid_xau_usd", "daily")

In [4]:
print(gold_6h.head(3))
print(gold_6h.tail(3))

                              open     high      low    close    volume
bucket_6h                                                              
2025-03-27 00:00:00+00:00  3021.79  3038.57  3021.62  3029.65   75729.0
2025-03-27 06:00:00+00:00  3029.65  3055.79  3026.59  3054.58   92939.0
2025-03-27 12:00:00+00:00  3054.59  3059.62  3033.44  3052.91  120149.0
                              open     high      low    close    volume
bucket_6h                                                              
2025-08-08 06:00:00+00:00  3392.24  3404.22  3380.33  3386.18  172042.0
2025-08-08 12:00:00+00:00  3386.18  3401.67  3381.10  3382.40  172696.0
2025-08-08 18:00:00+00:00  3381.62  3404.35  3374.69  3398.73   77489.0


In [5]:
print(gold_daily.head(3))
print(gold_daily.tail(3))

                              open     high      low    close    volume
bucket_daily                                                           
2025-06-05 00:00:00+00:00  3377.40  3403.42  3339.34  3361.95  407944.0
2025-06-06 00:00:00+00:00  3361.79  3375.53  3307.06  3309.85  370173.0
2025-06-08 00:00:00+00:00  3312.09  3321.08  3308.98  3318.02   23043.0
                              open     high      low    close    volume
bucket_daily                                                           
2025-08-06 00:00:00+00:00  3383.00  3385.36  3358.10  3370.17  433692.0
2025-08-07 00:00:00+00:00  3369.66  3408.61  3365.17  3400.18  525919.0
2025-08-08 00:00:00+00:00  3401.07  3404.35  3374.69  3398.73  581323.0


In [None]:
def calculate_slowD(df: pd.DataFrame, k_period: int = 9, d_period: int = 3) -> pd.DataFrame:
    """A stochastic function that calculates the Fast %K & Slow %D using EMA.
    
    Parameters
    ----------
    df: pd.DataFrame (Input dataframe containing OHLC data.)
    k_period: int, optional (Period to calculate the Fast %K <default is 9>.)
    d_period: int, optional (Period to calculate the Slow %D <default is 3>.)
    
    Returns
    -------
    pd.DataFrame (DataFrame that contains Fast %K, Fast %D (EMA), and Slow %D (EMA).)
    """

    # find the highest high market price in the k period
    df['highest_high'] = df['high'].rolling(window=k_period).max()

    # find the lowest low market price in the k period
    df['lowest_low'] = df['low'].rolling(window=k_period).min()

    # calculate Fast %K
    df['fastk'] = ((df['close'] - df['lowest_low']) / (df['highest_high'] - df['lowest_low'])) * 100

    # calculate Fast %D (EMA of Fast %K with period 1, which is just FastK itself)
    df['fastd'] = df['fastk']

    # calculate Slow %D (EMA of Fast %D with period d_period)
    df['slowd'] = df['fastd'].ewm(span=d_period, adjust=False).mean()

    # drop unecessary columns
    df.drop(columns=['highest_high', 'lowest_low'], inplace=True)

    # Return the dataframe with stochastic values
    return df


In [7]:
def calculate_cci(df: pd.DataFrame, period: int) -> pd.DataFrame:
    """ A method that calculates commodity channel index.

        Parameters
        ----------
        df: pd.DataFrame (Input dataframe containing OHLC data.)
        period: int (lookback period)

        Returns
        -------
        pd.DataFrame (DataFrame that contains Commodity Channel Index (CCI).)
    """
        
    # calculate the typical price
    df['typical_price'] = (df['high'] + df['low'] + df['close']) / 3

    # calculate the simple moving average (SMA) of the Typical Price
    sma = df['typical_price'].rolling(window=period).mean()

    # calculate the mean deviation manually
    mean_deviation = df['typical_price'].rolling(window=period).apply(
        lambda x: (np.abs(x - x.mean()).mean()), raw=True
    )

    # calculate the CCI
    df[f'CCI{period}'] = (df['typical_price'] - sma) / \
        (0.015 * mean_deviation)

    # return the resulted dataframe
    return df

In [8]:
gold_6h = calculate_slowD(gold_6h, k_period=9, d_period=3)
print(gold_6h.tail(3))

                              open     high      low    close    volume  \
bucket_6h                                                                 
2025-08-08 06:00:00+00:00  3392.24  3404.22  3380.33  3386.18  172042.0   
2025-08-08 12:00:00+00:00  3386.18  3401.67  3381.10  3382.40  172696.0   
2025-08-08 18:00:00+00:00  3381.62  3404.35  3374.69  3398.73   77489.0   

                               fastk      fastd      slowd  
bucket_6h                                                   
2025-08-08 06:00:00+00:00  55.592952  55.592952  64.735341  
2025-08-08 12:00:00+00:00  48.109285  48.109285  56.422313  
2025-08-08 18:00:00+00:00  79.569892  79.569892  67.996103  


In [9]:
gold_6h = calculate_cci(gold_6h, period=3)
gold_6h.tail(3)

Unnamed: 0_level_0,open,high,low,close,volume,fastk,fastd,slowd,typical_price,CCI3
bucket_6h,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2025-08-08 06:00:00+00:00,3392.24,3404.22,3380.33,3386.18,172042.0,55.592952,55.592952,64.735341,3390.243333,-66.251638
2025-08-08 12:00:00+00:00,3386.18,3401.67,3381.1,3382.4,172696.0,48.109285,48.109285,56.422313,3388.39,-100.0
2025-08-08 18:00:00+00:00,3381.62,3404.35,3374.69,3398.73,77489.0,79.569892,79.569892,67.996103,3392.59,100.0
