In [165]:
import torch
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm

In [180]:
class ANN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(ANN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        feature = F.relu(x)
        output = self.fc3(feature)
        return output, feature

def create_lagged_features(df, days=30):
    features = []
    targets = []

    for i in tqdm(range(days-1, len(df))):
        feature_row = []
        target = df.iloc[i]['1_trend']

        # 過去days天的數據
        for j in range(days-1, -1, -1):
            feature_row.extend([
                df.iloc[i - j]['close'],
                df.iloc[i - j]['open'],
                df.iloc[i - j]['high'],
                df.iloc[i - j]['low'],
                df.iloc[i - j]['volume']
            ])
        features.append(feature_row)
        targets.append(target)

    return np.array(features), np.array(targets)

def create_test_features(df, days=30):
    
    features = []
    for i in tqdm(range(days-1, len(df)+1, days)):
        feature_row = []

        for j in range(days-1, -1, -1):
            feature_row.extend([
                df.iloc[i - j]['close'],
                df.iloc[i - j]['open'],
                df.iloc[i - j]['high'],
                df.iloc[i - j]['low'],
                df.iloc[i - j]['volume']
            ])

        features.append(feature_row)
    
    return np.array(features)

def train_ann(features_train, targets_train, features_val, targets_val):
    input_size = 150
    hidden_size = 64
    output_size = 3
    model = ANN(input_size, hidden_size, output_size)
    class_weights = torch.tensor([1.14, 0.79, 1.15], dtype=torch.float32)
    criterion = nn.CrossEntropyLoss(class_weights)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    epochs = 2000
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        output, _ = model(features_train)
        loss = criterion(output, targets_train)
        loss.backward()
        optimizer.step()

        if (epoch + 1) % 100 == 0:
            print(f'Epoch {epoch+1} / {epochs}, loss: {loss.item()}')

        model.eval()
        with torch.no_grad():
            output, _ = model(features_val)
            val_loss = criterion(output, targets_val)
            if (epoch + 1) % 100 == 0:
                print(f'Epoch {epoch+1} / {epochs}, val_loss: {val_loss.item()}')

    return model
def train_rf(features, targets):
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(features, targets)
    return model

In [167]:
train_df = pd.read_csv('./data/train.csv')
train_df['1_trend'] = train_df['1_trend'].map({-1: 0, 0: 1, 1: 2})
train_df['5_trend'] = train_df['5_trend'].map({-1: 0, 0: 1, 1: 2})
train_df['10_trend'] = train_df['10_trend'].map({-1: 0, 0: 1, 1: 2})
features, targets = create_lagged_features(train_df)

100%|██████████| 1833/1833 [00:34<00:00, 53.08it/s]


In [168]:
features_train, features_val, targets_train, targets_val = train_test_split(features, targets, test_size=0.3, random_state=42)

scaler = StandardScaler()
features_train = scaler.fit_transform(features_train)
features_val = scaler.transform(features_val)


features_train = torch.tensor(features_train, dtype=torch.float32)
features_val = torch.tensor(features_val, dtype=torch.float32)

targets_train = torch.tensor(targets_train, dtype=torch.long)
targets_val = torch.tensor(targets_val, dtype=torch.long)

In [181]:
model = train_ann(features_train, targets_train, features_val, targets_val)

Epoch 100 / 2000, loss: 0.8207222819328308
Epoch 100 / 2000, val_loss: 1.2934614419937134
Epoch 200 / 2000, loss: 0.4925943613052368
Epoch 200 / 2000, val_loss: 1.7620182037353516
Epoch 300 / 2000, loss: 0.30068495869636536
Epoch 300 / 2000, val_loss: 2.3843138217926025
Epoch 400 / 2000, loss: 0.19178594648838043
Epoch 400 / 2000, val_loss: 2.957812786102295
Epoch 500 / 2000, loss: 0.11643590033054352
Epoch 500 / 2000, val_loss: 3.570354461669922
Epoch 600 / 2000, loss: 0.07375786453485489
Epoch 600 / 2000, val_loss: 4.170519828796387
Epoch 700 / 2000, loss: 0.04777519032359123
Epoch 700 / 2000, val_loss: 4.735763072967529
Epoch 800 / 2000, loss: 0.031823720782995224
Epoch 800 / 2000, val_loss: 5.265988826751709
Epoch 900 / 2000, loss: 0.02211838774383068
Epoch 900 / 2000, val_loss: 5.713930130004883
Epoch 1000 / 2000, loss: 0.01597757637500763
Epoch 1000 / 2000, val_loss: 6.112009048461914
Epoch 1100 / 2000, loss: 0.01197869423776865
Epoch 1100 / 2000, val_loss: 6.470445156097412
Epoc

In [177]:
model.eval()
with torch.no_grad():
    output, feature_extract = model(features_val)
    _, predicted = torch.max(output, 1)
    print('ANN accuracy:', accuracy_score(targets_val, predicted))
    print(classification_report(targets_val, predicted))

ANN accuracy: 0.3188010899182561
              precision    recall  f1-score   support

           0       0.27      0.29      0.28       107
           1       0.42      0.39      0.40       154
           2       0.24      0.25      0.24       106

    accuracy                           0.32       367
   macro avg       0.31      0.31      0.31       367
weighted avg       0.32      0.32      0.32       367



In [171]:
test_df = pd.read_csv('./data/test.csv')
test_features = create_test_features(test_df)

test_features = torch.tensor(test_features, dtype=torch.float32)

test_features = scaler.transform(test_features)

test_features = torch.tensor(test_features, dtype=torch.float32)

100%|██████████| 427/427 [00:07<00:00, 57.18it/s]


In [172]:
model.eval()
with torch.no_grad():
    output, feature_extract = model(test_features)
    predicted = torch.argmax(output, 1).numpy()

class_map = {0: -1, 1: 0, 2: 1}
mapped_predictions = np.array([class_map[p] for p in predicted])

output_df = pd.DataFrame({
    'id': [i for i in range(len(mapped_predictions))],
    'trend': mapped_predictions
})

output_df.to_csv('./data/ann_predictions1.csv', index=False)


In [173]:
df1 = pd.read_csv('./data/ann_predictions1.csv')
df2 = pd.read_csv('./data/ann_predictions2.csv')
df3 = pd.read_csv('./data/ann_predictions3.csv')


combined_df = pd.concat([df1, df2, df3], ignore_index=True)

combined_df.to_csv('output.csv', index=False)
