# Testing Jupyter 

In [11]:
import pandas as pd
from datetime import datetime

df = pd.read_excel("daily_bitcoin_ohlc.xlsx")

# print(df.head())

def calculate_daily_returns(df) -> pd.Series:
    """
        Calculate the daily returns from a DataFrame with a 'Close' column.

        Parameters:
            df (pd.DataFrame): DataFrame containing historical price data with a 'Close' column.

        Returns:
            pd.Series: A Series of daily return values (as decimal percentages).
    """

    daily_returns = df['close'].pct_change()
    # .pct_change() is a built in pandas method 
    # ( calculates the percentage change between the current and the prev element)
    return daily_returns

df['timestamp'] = pd.to_datetime(df['timestamp'])

df['Daily Return'] = calculate_daily_returns(df)

In [10]:
def extract_date_components(timestamp_input) -> dict:
    """
    Transform a timestamp (string in “YYYY-MM-DD HH:MM:SS” format *or* a
    datetime.datetime object) into a dictionary of calendar-based features.

    Parameters
    ----------
    timestamp_input : str | datetime.datetime
        Either the timestamp string (e.g. "2023-10-26 14:30:00")
        or an already-parsed datetime object.

    Returns
    -------
    dict
        {
            "year":               2023,
            "quarter":            4,
            "month_number":       10,
            "month_name":         "October",
            "week_of_year":       43,      # ISO-8601 week number
            "day_of_week_number": 4,       # Monday=1 … Sunday=7  (ISO)
            "day_of_week_name":   "Thursday"
        }
    """

# Checks if timestamp input is of instance/type datetime
    if isinstance(timestamp_input, datetime):
        dt = timestamp_input
    elif isinstance(timestamp_input, str):
        try:   
            dt = datetime.strptime(timestamp_input, "%Y-%m-%d %H:%M:%S")
        except ValueError as exc:          
            raise ValueError(
                "Timestamp string must match 'YYYY-MM-DD HH:MM:SS'"
            ) from exc
    else:
        raise TypeError(
            "timestamp_input must be a datetime object or a timestamp string"
        )


    features = {
         "year":               dt.year,
        "quarter":            (dt.month - 1) // 3 + 1,          # 1–4
        "month_number":       dt.month,                         # 1–12
        "month_name":         dt.strftime("%B"),                # "January"
        "week_number":         dt.isocalendar().week,         # "January"
        "day_of_week_number": dt.weekday() + 1,                    # 1=Mon … 7=Sun
        "day_of_week_name":   dt.strftime("%A"),                # "Monday"…
    }

    return features

In [14]:
test_output = extract_date_components("2023-10-26 14:30:00")
print(test_output)


{'year': 2023, 'quarter': 4, 'month_number': 10, 'month_name': 'October', 'week_number': 43, 'day_of_week_number': 4, 'day_of_week_name': 'Thursday'}


In [4]:
from datetime import datetime

test_dt = datetime(2025, 5, 26, 20, 15, 0)
print(extract_date_components(test_dt))


{'year': 2025, 'quarter': 2, 'month_number': 5, 'month_name': 'May', 'week_number': 22, 'day_of_week_number': 0, 'day_of_week_name': 'Monday'}


In [6]:
# Expecting a ValueError due to bad format
try:
    extract_date_components("2023/10/26")
except ValueError as e:
    print("Caught expected ValueError:", e)

# Expecting a TypeError due to wrong input type
try:
    extract_date_components(12345)
except TypeError as e:
    print("Caught expected TypeError:", e)


Caught expected ValueError: Timestamp string must match 'YYYY-MM-DD HH:MM:SS'
Caught expected TypeError: timestamp_input must be a datetime object or a timestamp string


In [16]:
# Apply to a DataFrame and explode the dict into columns
df_features = df['timestamp'].apply(extract_date_components)

# Merge with the original DataFrame
df = pd.concat([df, df_features], axis=1)

# Preview the results
df.head()


Unnamed: 0,timestamp,open,high,low,close,Daily Return,year,quarter,month_number,month_name,...,day_of_week_number,day_of_week_name,year.1,quarter.1,month_number.1,month_name.1,week_number,day_of_week_number.1,day_of_week_name.1,timestamp.1
0,2018-02-09 00:00:00+00:00,7611.61,7611.61,7611.61,7611.61,,2018,1,2,February,...,4,Friday,2018,1,2,February,6,5,Friday,"{'year': 2018, 'quarter': 1, 'month_number': 2..."
1,2018-02-10 00:00:00+00:00,8208.57,8674.76,7847.14,8672.57,0.139387,2018,1,2,February,...,5,Saturday,2018,1,2,February,6,6,Saturday,"{'year': 2018, 'quarter': 1, 'month_number': 2..."
2,2018-02-11 00:00:00+00:00,8659.92,9088.97,8283.43,8590.21,-0.009497,2018,1,2,February,...,6,Sunday,2018,1,2,February,6,7,Sunday,"{'year': 2018, 'quarter': 1, 'month_number': 2..."
3,2018-02-12 00:00:00+00:00,8583.38,8583.38,7890.82,8064.69,-0.061177,2018,1,2,February,...,0,Monday,2018,1,2,February,7,1,Monday,"{'year': 2018, 'quarter': 1, 'month_number': 2..."
4,2018-02-13 00:00:00+00:00,8105.98,8920.31,8105.98,8845.22,0.096784,2018,1,2,February,...,1,Tuesday,2018,1,2,February,7,2,Tuesday,"{'year': 2018, 'quarter': 1, 'month_number': 2..."


In [15]:
def calculate_daily_returns(df) -> pd.Series:
    """
        Calculate the daily returns from a DataFrame with a 'Close' column.

        Parameters:
            df (pd.DataFrame): DataFrame containing historical price data with a 'Close' column.

        Returns:
            pd.Series: A Series of daily return values (as decimal percentages).
    """

    daily_returns = df['close'].pct_change()
    # .pct_change() is a built in pandas method 
    # ( calculates the percentage change between the current and the prev element)
    return daily_returns

df['timestamp'] = pd.to_datetime(df['timestamp'])

df['Daily Return'] = calculate_daily_returns(df)

In [16]:
def get_return_std_dev(return_series):
    """
    Calculates the standard deviation of returns from a Series of daily returns.    

    Parameters:
        return_series (pd.Series): Series containing daily returns.

    Returns:
        float: The standard deviation of returns, rounded to 4 decimal places.

    Example usage:
        std_dev = get_return_std_dev(daily_returns)
    """
    return round(return_series.std(), 4)

In [20]:
def get_rolling_volatility(return_series, window):
    """
    Calculates the rolling standard deviation of returns over a specified window.

    Parameters:
        return_series (pd.Series): Series containing daily returns.
        window (int): The number of days to calculate the rolling standard deviation over.
    
    Returns:
        pd.Series: A Series containing the rolling standard deviation of returns.

    Example usage:
        rolling_volatility = get_rolling_volatility(daily_returns, 20)
    """
    return return_series.rolling(window).std()  


In [17]:
return_series = calculate_daily_returns(df)

In [22]:
print("STD: ", get_return_std_dev(return_series))
print("rolling volatility: ", get_rolling_volatility(return_series, window = 10))

STD:  0.0339
