In [1]:
import os

current_working_directory = os.getcwd()

print("Current working directory:", current_working_directory)

Current working directory: /home/kenchen1216/StockTools/US_Stock


In [None]:
使用Auto-Sklearn訓練模型
波段交易的目標是捕捉股票價格的中短期波動
數據只有信號

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from joblib import dump, load
import os
from autosklearn.experimental.askl2 import AutoSklearn2Classifier

# 定義加載和合併所有CSV文件的函數
def load_and_combine_data(directory_path):
    all_data = pd.DataFrame()
    for filename in os.listdir(directory_path):
        if filename.endswith('.csv'):
            filepath = os.path.join(directory_path, filename)
            df = pd.read_csv(filepath, index_col=None, header=0)
            all_data = pd.concat([all_data, df], ignore_index=True)
    return all_data

# 加載數據
directory_path = '/home/kenchen1216/StockTools/US_Stock/ProcessData/ProcessData 2'  # 更改為實際的文件夾路徑
all_stocks_data = load_and_combine_data(directory_path)

# 數據預處理
X = all_stocks_data.drop(labels=['Future_Signal'], axis=1).values  # 假設需要刪除Signal
y = all_stocks_data['Future_Signal'].apply(lambda x: 1 if x > 0 else 0).values  # 將Signal轉換為二元變量（1代表買入，0代表賣出）

# 訓練測試分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
print('train shape:', X_train.shape)
print('test shape:', X_test.shape)

# 使用auto-sklearn 2.0訓練模型
automl = AutoSklearn2Classifier(
    time_left_for_this_task=18000, per_run_time_limit=3600, ensemble_size=150, ensemble_nbest=50, n_jobs=-1, memory_limit=40960)     # 設置記憶體限制為40GB
automl.fit(X_train, y_train)

# 儲存模型
model_path = '/home/kenchen1216/StockTools/US_Stock/models/2d_lagging2.joblib'  # 更改為實際的儲存路徑
dump(automl, model_path)


# 模型預測和評估
y_pred = automl.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Model accuracy: ", accuracy)


# 加載模型（當需要時）
loaded_model = load(model_path)
y_pred_loaded = loaded_model.predict(X_test)
accuracy_loaded = accuracy_score(y_test, y_pred_loaded)
print("Loaded model accuracy: ", accuracy_loaded)


train shape: (3554521, 18)
test shape: (1523367, 18)
Using Backup selector
Model accuracy:  0.6485909173560934
Loaded model accuracy:  0.6485909173560934


In [None]:
台股驗證集數據處理
使用模型預測交易信號，並輸出一個目錄(含預測數據)

In [1]:
import pandas as pd
import os
from joblib import load
import shutil

# 預處理函數，只排除 'Date' 和 'Pattern'
def preprocess_data(df):
    df_processed = df.drop(labels=['Future_Signal'], axis=1, errors='ignore')
    return df_processed

def clear_directory(directory_path):
    # 檢查目錄是否存在
    if not os.path.exists(directory_path):
        print(f"Directory {directory_path} does not exist.")
        return
    # 遍歷目錄中的所有檔案並刪除
    for filename in os.listdir(directory_path):
        file_path = os.path.join(directory_path, filename)
        try:
            if os.path.isfile(file_path) or os.path.islink(file_path):
                os.unlink(file_path)  # 刪除檔案或符號連結
            elif os.path.isdir(file_path):
                shutil.rmtree(file_path)  # 刪除目錄
        except Exception as e:
            print(f"Failed to delete {file_path}. Reason: {e}")

def process_files_and_predict(directory_path, model, output_directory):
    if not os.path.exists(output_directory):
        os.makedirs(output_directory) 

    clear_directory(output_directory)
        
    file_paths = [os.path.join(directory_path, f) for f in os.listdir(directory_path) if f.endswith('.csv')]
    
    for file_path in file_paths:
        df = pd.read_csv(file_path)
        #df = preprocess_data(df) # for using model: models_signal1.joblib to predict signals, if not using models_signal1.joblib can bypass this step.
        features = df.drop(['Date', 'Pattern'], axis=1, errors='ignore') 
        
        # 在進行預測前檢查 features 是否為空
        if features.empty or features.shape[1] == 0:
            print(f"Warning: No features to predict for file {file_path}. Skipping.")
            continue
        
        # 使用模型進行預測
        try:
            prediction = model.predict(features)
            # 將0轉換為-100，將1轉換為100
            prediction_transformed = [-100 if x == 0 else 100 for x in prediction]
            df_predictions = pd.DataFrame(prediction_transformed, columns=['Predicted_Signal'])
            # 將預測結果添加到 DataFrame
            df['Predicted_Signal'] = df_predictions
            output_file_path = os.path.join(output_directory, os.path.basename(file_path))
            df.to_csv(output_file_path, index=False)
        except ValueError as e:
            print(f"Error predicting for file {file_path}: {e}")
            continue


if __name__ == "__main__":
    model_path = '/home/kenchen1216/StockTools/US_Stock/models/2d_lagging1.joblib'  # model path

    model = load(model_path)

    directory_path = '/home/kenchen1216/StockTools/US_Stock/TWVaildData'  # Vaild Dataset

    output_directory = '/home/kenchen1216/StockTools/US_Stock/TWPredictedData'  # Predict Output Data

    process_files_and_predict(directory_path, model, output_directory)

    print("Processing complete. Predictions written to new CSV files in", output_directory)


Processing complete. Predictions written to new CSV files in /home/kenchen1216/StockTools/US_Stock/TWPredictedData


In [None]:
台股驗證集數據處理
將Signal欄位加回驗證集

In [2]:
import pandas as pd
import os

def merge_date_signal(merged_csv_path, predicted_csv_path, output_dir):
    """
    從 merged_csv_path 中的 CSV 文件合併 'Date' 和 'Signal' 列到
    predicted_csv_path 中對應的 CSV 文件。

    參數：
    - merged_csv_path: 包含原始 CSV 文件的目錄的路徑。
    - predicted_csv_path: 包含需要添加數據的 CSV 文件的目錄的路徑。
    - output_dir: 將更新後的 CSV 文件保存的目錄的路徑。
    """
    os.makedirs(output_dir, exist_ok=True)
    
    merged_csv_files = [f for f in os.listdir(merged_csv_path) if f.endswith('.csv')]

    for file_name in merged_csv_files:
        merged_file_path = os.path.join(merged_csv_path, file_name)
        
        predicted_file_path = os.path.join(predicted_csv_path, file_name)
        
        if os.path.exists(predicted_file_path):
            merged_df = pd.read_csv(merged_file_path)
            # 僅選擇 Date 和 Signal 列並重命名 Signal 以避免衝突
            relevant_data = merged_df[['Date', 'Signal']].rename(columns={'Signal': 'Original_Signal'})
            
            predicted_df = pd.read_csv(predicted_file_path)
            
            # 將 Date 和 Original_Signal 列合併到預測的 DataFrame 中
            updated_df = pd.merge(predicted_df, relevant_data, left_index=True, right_index=True, how='inner')
            
            # 將更新後的 DataFrame 保存到指定的輸出目錄
            output_file_path = os.path.join(output_dir, file_name)
            updated_df.to_csv(output_file_path, index=False)
            print(f"Updated {file_name} and saved to {output_file_path}")
        else:
            print(f"No corresponding file found for {file_name} in predicted data directory.")


stock_data_dir = '/home/kenchen1216/StockTools/US_Stock/TWStockData'
sort_result_dir = '/home/kenchen1216/StockTools/US_Stock/TW_sort_result'
merged_data_dir = '/home/kenchen1216/StockTools/US_Stock/TW_MergedData/'
train_data_dir = '/home/kenchen1216/StockTools/US_Stock/ValidData'
features_data_dir = '/home/kenchen1216/StockTools/US_Stock/TWVaildData'
output_directory = '/home/kenchen1216/StockTools/US_Stock/TWPredictedData'  # 保存預測結果 CSV 文件的目錄
# Example usage
merge_date_signal(merged_data_dir, output_directory, output_directory)


Updated 2024.TW.csv and saved to /home/kenchen1216/StockTools/US_Stock/TWPredictedData/2024.TW.csv
Updated 2211.TW.csv and saved to /home/kenchen1216/StockTools/US_Stock/TWPredictedData/2211.TW.csv
Updated 2323.TW.csv and saved to /home/kenchen1216/StockTools/US_Stock/TWPredictedData/2323.TW.csv
Updated 2486.TW.csv and saved to /home/kenchen1216/StockTools/US_Stock/TWPredictedData/2486.TW.csv
Updated 3051.TW.csv and saved to /home/kenchen1216/StockTools/US_Stock/TWPredictedData/3051.TW.csv
Updated 6776.TW.csv and saved to /home/kenchen1216/StockTools/US_Stock/TWPredictedData/6776.TW.csv
Updated 2897.TW.csv and saved to /home/kenchen1216/StockTools/US_Stock/TWPredictedData/2897.TW.csv
Updated 6933.TW.csv and saved to /home/kenchen1216/StockTools/US_Stock/TWPredictedData/6933.TW.csv
Updated 9914.TW.csv and saved to /home/kenchen1216/StockTools/US_Stock/TWPredictedData/9914.TW.csv
Updated 2031.TW.csv and saved to /home/kenchen1216/StockTools/US_Stock/TWPredictedData/2031.TW.csv
Updated 21

In [None]:
台股驗證集數據處理
使用模型預測交易信號，判斷預測準確度

In [None]:
台股驗證集數據處理
將原本數據中的'Date', 'Adj Close', 'Signal', 'Pattern'列，放在預測交易信號輸出的對應文檔中

In [4]:
import pandas as pd
import os
import glob

def merge_data_and_save(stock_dir, predicted_dir):
    # List all CSV files in the stock_data directory
    stock_files = glob.glob(os.path.join(stock_dir, "*.csv"))

    for stock_file in stock_files:
        # Extract the stock code from the file name
        stock_code = os.path.basename(stock_file)
        predicted_file = os.path.join(predicted_dir, stock_code)

        # Check if the corresponding file exists in the PredictedTWData directory
        if os.path.exists(predicted_file):
            # Read the necessary columns from the stock file
            stock_df = pd.read_csv(stock_file, usecols=['Date', 'Adj Close', 'Signal', 'Pattern'])
            
            # Read the predicted data file
            predicted_df = pd.read_csv(predicted_file)
            
            # Merge the 'Date' and 'Adj Close' columns into the predicted data
            merged_df = pd.concat([predicted_df, stock_df[['Date', 'Adj Close', 'Signal', 'Pattern']]], axis=1)
            
            # Save the merged DataFrame back to the PredictedTWData directory
            merged_df.to_csv(predicted_file, index=False)

# Replace 'path_to_stock_data' and 'path_to_predicted_data' with the actual paths
path_to_stock_data = '/home/kenchen1216/StockTools/US_Stock/TWMergedData'
path_to_predicted_data = '/home/kenchen1216/StockTools/US_Stock/PredictedTWData'

merge_data_and_save(path_to_stock_data, path_to_predicted_data)
