# **#前処理**

In [24]:
import pandas as pd

train_data = pd.read_csv('/home-asustor/teramoto/SIGNATE/EX/StockPrice_prediction/data/train.csv')
test_data = pd.read_csv('/home-asustor/teramoto/SIGNATE/EX/StockPrice_prediction/data/test.csv')

In [25]:
# 日付の間隔を確認
train_data['Date'] = pd.to_datetime(train_data['Date'])
train_data = train_data.sort_values('Date')
train_data['Date_diff'] = train_data['Date'].diff().dt.days

# 日付の間隔がどのようになっているか確認
print(train_data['Date_diff'].value_counts())


Date_diff
1.0    1591
3.0     362
4.0      57
2.0      14
5.0       1
Name: count, dtype: int64


# **#モデルの構築**

In [26]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# ボリンジャーバンドの計算
train_data['SMA20'] = train_data['Close'].rolling(window=20).mean()
train_data['STD20'] = train_data['Close'].rolling(window=20).std()
train_data['Upper Band'] = train_data['SMA20'] + (train_data['STD20'] * 2)
train_data['Lower Band'] = train_data['SMA20'] - (train_data['STD20'] * 2)

# ボリンジャーバンドを利用した予測ラベル
train_data['Signal'] = 0  # デフォルトは0（シグナルなし）

# 上部バンドを超えた場合に売りシグナルをセット
train_data.loc[train_data['Close'] > train_data['Upper Band'], 'Signal'] = -1

# 下部バンドを下回った場合に買いシグナルをセット
train_data.loc[train_data['Close'] < train_data['Lower Band'], 'Signal'] = 1

# 欠損値を補完または削除
train_data.dropna(inplace=True)  # 欠損値を削除

# 特徴量と目的変数の定義
X = train_data[['SMA20', 'Upper Band', 'Lower Band']]
y = train_data['Signal']

# 学習データと検証データに分割
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# モデルの構築と学習
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# 検証データでの予測と評価
y_pred = model.predict(X_val)
print("Accuracy:", accuracy_score(y_val, y_pred))


Accuracy: 0.9104477611940298


In [27]:
# テストデータにもボリンジャーバンドを適用
test_data['SMA20'] = test_data['Close'].rolling(window=20).mean()
test_data['STD20'] = test_data['Close'].rolling(window=20).std()
test_data['Upper Band'] = test_data['SMA20'] + (test_data['STD20'] * 2)
test_data['Lower Band'] = test_data['SMA20'] - (test_data['STD20'] * 2)

# 欠損値を削除または補完
test_data.dropna(inplace=True)  # 欠損値を削除する場合

# 特徴量の選択
X_test = test_data[['SMA20', 'Upper Band', 'Lower Band']]

# テストデータでの予測
test_pred = model.predict(X_test)

# 結果を保存する
result = pd.DataFrame({'Date': test_data['Date'], 'Predicted_Signal': test_pred})
result.to_csv('/home-asustor/teramoto/SIGNATE/EX/StockPrice_prediction/data/submission_bollinger.csv', index=False)
