In [1]:
import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from scipy.stats import norm

In [2]:
start_date="2011-10-01"
end_date="2024-09-30"
window_size = 252

In [3]:
tickers = ["ARL","BH","CIX","FDBC","GEF","NATH","NKSH","NWFL","PLBC","PNRG","QRTEB","RGCO","STRS","SWKH","TCI","VABK","VALU"]

In [4]:
column_names = ['date', 'tic', 'stat_Anomaly_Probability']
result_df = pd.DataFrame(columns=column_names)

In [5]:
for target_ticker in tickers:
    treasury_data =yf.download('^IRX', start=start_date, end=end_date)
    treasury_data['daily_risk_free_rate'] = (treasury_data['Adj Close'] / 100) / 252
    treasury_data=treasury_data[['daily_risk_free_rate']]
    mkt_data =yf.download('^RUT', start=start_date, end=end_date)
    mkt_data['daily_market_return'] = mkt_data['Adj Close'].pct_change()
    mkt_data=mkt_data[['daily_market_return']]
    stock_data =yf.download(target_ticker, start=start_date, end=end_date)
    stock_data['daily_return'] = stock_data['Adj Close'].pct_change()
    stock_data=stock_data[['daily_return']]
    combined_df = pd.concat([treasury_data, mkt_data, stock_data], axis=1).dropna()
    combined_df["tic"]=target_ticker
    combined_df["r_minus_rf"]=combined_df["daily_return"]-combined_df["daily_risk_free_rate"]
    combined_df["rm_minus_rf"]=combined_df["daily_market_return"]-combined_df["daily_risk_free_rate"]
    rolling_covariance = combined_df['rm_minus_rf'].rolling(window=window_size).cov(combined_df['r_minus_rf'])
    rolling_variance = combined_df['rm_minus_rf'].rolling(window=window_size).var()
    rolling_beta = rolling_covariance / rolling_variance
    combined_df['Rolling_Beta'] = rolling_beta
    combined_df['Excess_Return']=combined_df['r_minus_rf']-rolling_beta.shift(1)*combined_df['rm_minus_rf']
    combined_df=combined_df.dropna()
    combined_df['rolling_mean']= combined_df['Excess_Return'].rolling(window=window_size).mean()
    combined_df['rolling_std_dev']= combined_df['Excess_Return'].rolling(window=window_size).std()
    combined_df['ratio_to_std']= combined_df['Excess_Return']/combined_df['rolling_std_dev']
    combined_df['stat_Anomaly_Probability']= 2*np.abs(norm.cdf(combined_df['ratio_to_std'], 0, 1)-0.5)
    combined_df = combined_df.dropna()
    combined_df_reset = combined_df.reset_index()
    combined_df_reset.rename(columns={'Date':'date'}, inplace=True)
    res_df=combined_df_reset[['date','tic','stat_Anomaly_Probability']]
    result_df = pd.concat([result_df, res_df], ignore_index=True)

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
  result_df = pd.concat([result_df, res_df], ignore_index=True)
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%************

In [6]:
data2 = pd.read_csv('df_final_prob.csv', low_memory=False)
data2 = data2[['date','tic','close','volume','DBSCAN_Anomaly_Probability','IsolationForest_Anomaly_Probability','OCSVM_Anomaly_Probability','LSTM_Anomaly_Probability']]
data2['date'] = pd.to_datetime(data2['date'], format='%Y/%m/%d')

In [7]:
df_merged = pd.merge(data2, result_df, on=['date', 'tic'],how='left').fillna(0)

In [8]:
df_merged.to_csv('df_final_prob_renewed.csv', index=False)