In [None]:
import streamlit as st
import pandas as pd
import requests
from bs4 import BeautifulSoup
import yfinance as yf
import datetime
import os

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns
import numpy as np
from sklearn.metrics import roc_curve, auc
import plotly.graph_objects as go
import pickle
import pickle
from datetime import datetime, timedelta

In [None]:
# test for check_if_today_starts_with_vertical_green_overlay

df = yf.download("AMD").reset_index()

def check_if_today_starts_with_vertical_green_overlay(df_test):
    df_pattern = (
        df_test[df_test['pred']]
        .groupby((~df_test['pred']).cumsum())
        ['Date']
        .agg(['first', 'last'])
    )
    
    last_date_included = False
    for idx, row in df_pattern.iterrows():
        if df_test.iloc[-1]['Date'] >= row['first'] and df_test.iloc[-1]['Date'] <= row['last'] and (row['last']!=row['first']):
            last_date_included = True
    
    # Check if the last entry has pred as True and is not included in any green fill
    if df_test.iloc[-1]['pred'] and not last_date_included:
        return True

    return False

def create_feature_cols(df):
    for m in [10, 20, 30, 50, 100]:
        df[f'feat_dist_from_ma_{m}'] = df['Close']/df['Close'].rolling(m).mean()-1

    # Distance from n day max/min
    for m in [3, 5, 10, 15, 20, 30, 50, 100]:
        df[f'feat_dist_from_max_{m}'] = df['Close']/df['High'].rolling(m).max()-1
        df[f'feat_dist_from_min_{m}'] = df['Close']/df['Low'].rolling(m).min()-1

    # Price distance
    for m in [1, 2, 3, 4, 5, 10, 15, 20, 30, 50, 100]:
        df[f'feat_price_dist_{m}'] = df['Close']/df['Close'].shift(m)-1

    # Target = if the price above the 20 ma in 5 days time
    df['target_ma'] = df['Close'].rolling(20).mean()
    df['price_above_ma'] = df['Close'] > df['target_ma']
    return df

def test_model(selected_option, last_n_days):
    with open(f"../models/{selected_option}_model.pkl", "rb") as f:
        clf = pickle.load(f)

    current_date = datetime.now() - timedelta(days=2)
    # Subtract last_n_days days from the current date
    test_until = current_date - timedelta(days=int(last_n_days))
    # Format the date as a string if necessary
    test_until = test_until.strftime('%Y-%m-%d')

    df2 = yf.download(selected_option).reset_index()
    df2 = create_feature_cols(df2)

    # show prediction on last last_n_days days
    df_test = df2[df2['Date'] > test_until].reset_index(drop=True)
    df_test['pred_prob'] = clf.predict_proba(df_test[['feat_dist_from_ma_10', 'feat_dist_from_ma_20', 'feat_dist_from_ma_30',
    'feat_dist_from_ma_50', 'feat_dist_from_ma_100', 'feat_dist_from_max_3',
    'feat_dist_from_min_3', 'feat_dist_from_max_5', 'feat_dist_from_min_5',
    'feat_dist_from_max_10', 'feat_dist_from_min_10',
    'feat_dist_from_max_15', 'feat_dist_from_min_15',
    'feat_dist_from_max_20', 'feat_dist_from_min_20',
    'feat_dist_from_max_30', 'feat_dist_from_min_30',
    'feat_dist_from_max_50', 'feat_dist_from_min_50',
    'feat_dist_from_max_100', 'feat_dist_from_min_100', 'feat_price_dist_1',
    'feat_price_dist_2', 'feat_price_dist_3', 'feat_price_dist_4',
    'feat_price_dist_5', 'feat_price_dist_10', 'feat_price_dist_15',
    'feat_price_dist_20', 'feat_price_dist_30', 'feat_price_dist_50',
    'feat_price_dist_100']])[:, 1]
    df_test['pred'] = df_test['pred_prob'] > 0.5

    return df_test

df_test = test_model("AMD", 30)

# at these dates it showed green overlay starting bar during testing, I'm setting these so anytime in the future i can make sure
# the function is working correctly
start_date = '2023-12-11'
end_date = '2024-01-08'

# Filter the DataFrame
df_test = df_test[(df_test['Date'] >= start_date) & (df_test['Date'] <= end_date)]
check_if_today_starts_with_vertical_green_overlay(df_test)