This ipynb file is to serve as production/test for the different sections of the algorithm/function module for the project in a step by step process before pushing to the final version

In [1]:
# To check the current python path
import sys
print(sys.executable)

/bin/python3


In [2]:
# Yahoo Finance API to pull the data
import yfinance as yf
# Pandas and Numpy for data maniupulation 
import pandas as pd
import numpy as np
# Matflotlib and Ploty for visualizations (Testing purposes)
import matplotlib.pyplot as plt
import plotly.express as px

In [17]:
def fetch_stock_data(tickers, start_date='2023-01-01', end_date='2025-01-01'):
    df = yf.download(tickers, start=start_date, end=end_date, group_by='ticker', progress=False)
    
    # Flatten multi-level columns
    flat_data = []
    for ticker in tickers:
        temp = df[ticker].copy()          # get data for each ticker
        temp = temp.reset_index()         # bring Date back as column
        temp['Ticker'] = ticker           # add ticker column
        flat_data.append(temp)
    
    combined_df = pd.concat(flat_data, ignore_index=True)
    return combined_df

# Example tickers
tickers = ['AAPL','JPM','PFE','PG','XOM']

# Fetch and flatten
df = fetch_stock_data(tickers)
df.head()


  df = yf.download(tickers, start=start_date, end=end_date, group_by='ticker', progress=False)


Price,Date,Open,High,Low,Close,Volume,Ticker
0,2023-01-03,128.46817,129.079543,122.443142,123.330627,112117500,AAPL
1,2023-01-04,125.125335,126.870724,123.340509,124.602707,89113600,AAPL
2,2023-01-05,125.361991,125.993089,123.024955,123.281334,80962700,AAPL
3,2023-01-06,124.257594,128.478063,123.153167,127.817383,87754700,AAPL
4,2023-01-09,128.655553,131.554669,128.083618,128.339996,70790800,AAPL


In [18]:
print(df.columns)

Index(['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Ticker'], dtype='object', name='Price')


In [19]:
def calculate_sma(df, window=5):
    df_list = []
    for ticker, group in df.groupby('Ticker'):
        group = group.sort_values('Date')
        group['SMA_' + str(window)] = group['Close'].rolling(window).mean()
        df_list.append(group)
    return pd.concat(df_list, ignore_index=True)

df = calculate_sma(df, window=5)
df.head()

Price,Date,Open,High,Low,Close,Volume,Ticker,SMA_5
0,2023-01-03,128.46817,129.079543,122.443142,123.330627,112117500,AAPL,
1,2023-01-04,125.125335,126.870724,123.340509,124.602707,89113600,AAPL,
2,2023-01-05,125.361991,125.993089,123.024955,123.281334,80962700,AAPL,
3,2023-01-06,124.257594,128.478063,123.153167,127.817383,87754700,AAPL,
4,2023-01-09,128.655553,131.554669,128.083618,128.339996,70790800,AAPL,125.474409


In [20]:
df = calculate_sma(df, window=20)
df.head()

Price,Date,Open,High,Low,Close,Volume,Ticker,SMA_5,SMA_20
0,2023-01-03,128.46817,129.079543,122.443142,123.330627,112117500,AAPL,,
1,2023-01-04,125.125335,126.870724,123.340509,124.602707,89113600,AAPL,,
2,2023-01-05,125.361991,125.993089,123.024955,123.281334,80962700,AAPL,,
3,2023-01-06,124.257594,128.478063,123.153167,127.817383,87754700,AAPL,,
4,2023-01-09,128.655553,131.554669,128.083618,128.339996,70790800,AAPL,125.474409,


In [21]:
def calculate_daily_returns(df):
    df_list = []
    for ticker, group in df.groupby('Ticker'):
        group = group.sort_values('Date')
        group['Daily_Return'] = group['Close'].pct_change()
        df_list.append(group)
    return pd.concat(df_list, ignore_index=True)

df = calculate_daily_returns(df)
df.head()

Price,Date,Open,High,Low,Close,Volume,Ticker,SMA_5,SMA_20,Daily_Return
0,2023-01-03,128.46817,129.079543,122.443142,123.330627,112117500,AAPL,,,
1,2023-01-04,125.125335,126.870724,123.340509,124.602707,89113600,AAPL,,,0.010314
2,2023-01-05,125.361991,125.993089,123.024955,123.281334,80962700,AAPL,,,-0.010605
3,2023-01-06,124.257594,128.478063,123.153167,127.817383,87754700,AAPL,,,0.036794
4,2023-01-09,128.655553,131.554669,128.083618,128.339996,70790800,AAPL,125.474409,,0.004089


In [22]:
def detect_trends(df):
    df_list = []
    for ticker, group in df.groupby('Ticker'):
        group = group.sort_values('Date')
        run = [0]*len(group)
        close = group['Close'].values
        for i in range(1, len(close)):
            if close[i] > close[i-1]:
                run[i] = run[i-1]+1 if run[i-1]>=0 else 1
            elif close[i] < close[i-1]:
                run[i] = run[i-1]-1 if run[i-1]<=0 else -1
            else:
                run[i] = 0
        group['Trend_Run'] = run
        df_list.append(group)
    return pd.concat(df_list, ignore_index=True)

df = detect_trends(df)
df.head()

Price,Date,Open,High,Low,Close,Volume,Ticker,SMA_5,SMA_20,Daily_Return,Trend_Run
0,2023-01-03,128.46817,129.079543,122.443142,123.330627,112117500,AAPL,,,,0
1,2023-01-04,125.125335,126.870724,123.340509,124.602707,89113600,AAPL,,,0.010314,1
2,2023-01-05,125.361991,125.993089,123.024955,123.281334,80962700,AAPL,,,-0.010605,-1
3,2023-01-06,124.257594,128.478063,123.153167,127.817383,87754700,AAPL,,,0.036794,1
4,2023-01-09,128.655553,131.554669,128.083618,128.339996,70790800,AAPL,125.474409,,0.004089,2
