In [1]:
import pandas as pd
import datetime, os, shutil
import stock_data as stock
import numpy as np
from tqdm import tqdm

import matplotlib.pyplot as plt
import seaborn as sns
from scipy.signal import find_peaks
from scipy.integrate import simpson

In [2]:
density_threshold = 1/4

### Get Data

In [3]:
folder_path = os.path.join(os.getcwd(), "data")

if not os.path.exists(folder_path):
    os.makedirs(folder_path)
    print(f"Create folder：{folder_path}")

In [4]:
import stock_data as stock

scrapy = stock.Scrapy()
today = datetime.datetime(2024, 6, 12).date() #datetime.datetime.today().date()
start = datetime.datetime(today.year - 4, today.month, today.day).date()
print(f"{start} ~ {today}")

2020-06-12 ~ 2024-06-12


price = scrapy.get_price(
    start = str(start),
    end = str(today),
    mode = "listed"
)
price.to_csv(f"data/price_{start.year}_{today.year}.csv", index = False)
print(f"length = {len(price)}")
price.head()

twiis = scrapy.get_price(
    start = str(start),
    end = str(today),
    mode = "other",
    query = "^TWII"
)
twiis.to_csv(f"data/twii_{start.year}_{today.year}.csv", index = False)

### Load Data

In [5]:
twii_raw = pd.read_csv(f"data/twii_{start.year}_{today.year}.csv", parse_dates = ["Date"])
price_raw = pd.read_csv(f"data/price_{start.year}_{today.year}.csv", parse_dates = ["Date"])

In [6]:
price_raw = price_raw.sort_values(by = ["Symbol", "Date"], ascending=False).reset_index(drop = True)
price_raw["Date"] = price_raw["Date"].dt.date
price_raw

Unnamed: 0,Symbol,Date,Open,High,Low,Close,Adj Close,Volume
0,9958.TW,2024-06-12,326.00,330.00,317.00,329.50,329.50,7102754.0
1,9958.TW,2024-06-11,332.00,336.00,322.00,324.50,324.50,15002777.0
2,9958.TW,2024-06-07,295.50,324.50,295.00,324.50,324.50,20816257.0
3,9958.TW,2024-06-06,286.50,296.00,283.00,295.00,295.00,5302951.0
4,9958.TW,2024-06-05,288.00,290.50,284.00,284.00,284.00,3921070.0
...,...,...,...,...,...,...,...,...
943811,1101.TW,2020-06-19,37.06,37.40,36.84,36.84,31.18,29018566.0
943812,1101.TW,2020-06-18,37.10,37.14,36.88,36.97,31.29,12029646.0
943813,1101.TW,2020-06-17,37.06,37.10,36.84,37.10,31.40,16973786.0
943814,1101.TW,2020-06-16,37.01,37.36,36.97,37.10,31.40,13740695.0


In [7]:
target_date = datetime.datetime(2023, 6, 12).date()

In [8]:
if os.path.exists("image"):
    shutil.rmtree("image") # remove all files in image folder

os.makedirs("image", exist_ok=True) # create image folder

In [9]:
groups = price_raw.groupby("Symbol")
for name, df_group in tqdm(groups):
    df_group = df_group.reset_index(drop = True)    
    if target_date in df_group["Date"].to_list():
        start_idx = df_group[df_group["Date"] == target_date].index[0]
        end_idx = start_idx + (250 * 2)
        if end_idx <= df_group.shape[0]:
            # print(f'{df_group.loc[start_idx, "Date"]} ~ {df_group.loc[end_idx, "Date"]}')
            df_group1 = df_group.loc[start_idx:end_idx]
            current_price = df_group1["Close"].values[0]

            plt.figure(figsize=(6, 4))
            kde = sns.kdeplot(df_group1["Close"])

            x, y = kde.get_lines()[0].get_data() # 獲取 KDE 曲線數據
            peaks, _ = find_peaks(y) # 找到局部峰值
            if len(peaks) == 2:
                vally = peaks[0] + y[peaks[0]:peaks[1]].argmin() # 找到谷值

                if (current_price > x[vally]):
                    total_area = simpson(y = y, x = x) # 計算總體積
                    second_peak_area = simpson(y = y[peaks[1]:], x = x[peaks[1]:]) # 計算第二個峰值後的面積

                    idx = next(idx for idx, value in enumerate(x) if value > current_price)
                    current_area = simpson(y = y[idx:], x = x[idx:]) # 計算第二個峰值後的面積

                    # 判斷是否超過總體密度的1/4
                    if (second_peak_area > (total_area * density_threshold)) and (current_area > (total_area * density_threshold)):
                        plt.plot(x[peaks], y[peaks], "o")
                        plt.plot(x[vally], y[vally], "o")

                        plt.axvline(x=current_price, color='r', linestyle='--')
                        plt.title(name)
                        plt.savefig(f'image/{name.replace(".TW", "")}.png')
            plt.close()

100%|██████████| 997/997 [00:22<00:00, 45.22it/s]
