# **#前処理**

In [42]:
import pandas as pd

train_data = pd.read_csv('/home-asustor/teramoto/SIGNATE/EX/StockPrice_prediction/data/train.csv')
test_data = pd.read_csv('/home-asustor/teramoto/SIGNATE/EX/StockPrice_prediction/data/test.csv')

In [43]:
# 日付の間隔を確認
train_data['Date'] = pd.to_datetime(train_data['Date'])
train_data = train_data.sort_values('Date')
train_data['Date_diff'] = train_data['Date'].diff().dt.days

# 日付の間隔がどのようになっているか確認
print(train_data['Date_diff'].value_counts())


Date_diff
1.0    1591
3.0     362
4.0      57
2.0      14
5.0       1
Name: count, dtype: int64


# **#モデルの構築**

In [22]:
import pandas as pd

# データの読み込み
train_data = pd.read_csv('/home-asustor/teramoto/SIGNATE/EX/StockPrice_prediction/data/train.csv')
test_data = pd.read_csv('/home-asustor/teramoto/SIGNATE/EX/StockPrice_prediction/data/test.csv')

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# ボリンジャーバンドの計算
train_data['SMA20'] = train_data['Close'].rolling(window=20).mean()
train_data['STD20'] = train_data['Close'].rolling(window=20).std()
train_data['Upper Band'] = train_data['SMA20'] + (train_data['STD20'] * 2)
train_data['Lower Band'] = train_data['SMA20'] - (train_data['STD20'] * 2)

# 次の日の終値と比較してSignalを設定
train_data['Next_Close'] = train_data['Close'].shift(-1)

# Signalの初期化（次の日の終値と比較）
train_data['Signal'] = 0

# 次の日の終値を下回った場合に1をセット
train_data.loc[train_data['Close'] > train_data['Next_Close'], 'Signal'] = 0
train_data.loc[train_data['Close'] < train_data['Next_Close'], 'Signal'] = 1

# 欠損値を削除（最終行のNext_CloseはNaNになるため）
train_data.dropna(inplace=True)

# 特徴量と目的変数の定義
X = train_data[['SMA20', 'Upper Band', 'Lower Band']]
y = train_data['Signal']

# 学習データと検証データに分割
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# モデルの構築と学習
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# 検証データでの予測と評価
y_pred = model.predict(X_val)
print("Accuracy:", accuracy_score(y_val, y_pred))

# テストデータにもボリンジャーバンドを適用
test_data['SMA20'] = test_data['Close'].rolling(window=20).mean()
test_data['STD20'] = test_data['Close'].rolling(window=20).std()
test_data['Upper Band'] = test_data['SMA20'] + (test_data['STD20'] * 2)
test_data['Lower Band'] = test_data['SMA20'] - (test_data['STD20'] * 2)

# 欠損値を削除
test_data.dropna(inplace=True)

# 特徴量の選択
X_test = test_data[['SMA20', 'Upper Band', 'Lower Band']]

# テストデータでの予測
test_pred = model.predict(X_test)

# 結果を保存する
result = pd.DataFrame({'Date': test_data['Date'], 'Predicted_Signal': test_pred})
result.to_csv('/home-asustor/teramoto/SIGNATE/EX/StockPrice_prediction/data/submission_bollinger.csv', index=False, header=False)


Accuracy: 0.47512437810945274


In [15]:
import pandas as pd

# データの読み込み
train_data = pd.read_csv('/home-asustor/teramoto/SIGNATE/EX/StockPrice_prediction/data/train.csv')
test_data = pd.read_csv('/home-asustor/teramoto/SIGNATE/EX/StockPrice_prediction/data/test.csv')

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# ボリンジャーバンドの計算
train_data['SMA20'] = train_data['Close'].rolling(window=20).mean()
train_data['STD20'] = train_data['Close'].rolling(window=20).std()
train_data['Upper Band'] = train_data['SMA20'] + (train_data['STD20'] * 2)
train_data['Lower Band'] = train_data['SMA20'] - (train_data['STD20'] * 2)

# 次の日の終値と比較してSignalを設定
train_data['Next_Close'] = train_data['Close'].shift(-1)

# Signalの初期化（次の日の終値と比較）
train_data['Signal'] = 0

# 次の日の終値を下回った場合に1をセット
train_data.loc[train_data['Close'] > train_data['Next_Close'], 'Signal'] = 0
train_data.loc[train_data['Close'] < train_data['Next_Close'], 'Signal'] = 1

# 欠損値を削除（最終行のNext_CloseはNaNになるため）
train_data.dropna(inplace=True)

# 特徴量と目的変数の定義
X = train_data[['SMA20', 'Upper Band', 'Lower Band']]
y = train_data['Signal']

# 学習データと検証データに分割
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# モデルの構築と学習
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# 検証データでの予測と評価
y_pred = model.predict(X_val)
print("Accuracy:", accuracy_score(y_val, y_pred))

# テストデータにもボリンジャーバンドを適用
test_data['SMA20'] = test_data['Close'].rolling(window=20).mean()
test_data['STD20'] = test_data['Close'].rolling(window=20).std()
test_data['Upper Band'] = test_data['SMA20'] + (test_data['STD20'] * 2)
test_data['Lower Band'] = test_data['SMA20'] - (test_data['STD20'] * 2)

# 欠損値が発生している2~20行目のデータに対する処理
missing_rows = test_data.iloc[1:20].copy()  # 2行目から20行目をコピー

# 2~20行目にボリンジャーバンドの計算を適用
missing_rows['SMA20'] = missing_rows['Close'].rolling(window=20, min_periods=1).mean()
missing_rows['STD20'] = missing_rows['Close'].rolling(window=20, min_periods=1).std()
missing_rows['Upper Band'] = missing_rows['SMA20'] + (missing_rows['STD20'] * 2)
missing_rows['Lower Band'] = missing_rows['SMA20'] - (missing_rows['STD20'] * 2)

# 欠損値が残っている場合、その行を削除
missing_rows = missing_rows.dropna()

# 特徴量の選択
X_missing = missing_rows[['SMA20', 'Upper Band', 'Lower Band']]

# 欠損値の予測
if not X_missing.empty:
    missing_pred = model.predict(X_missing)
    # 欠損した行の予測結果を表示
    missing_results = pd.DataFrame({'Date': missing_rows['Date'], 'Predicted_Signal': missing_pred})
    print(missing_results)
else:
    print("対象の行がないため、予測をスキップします。")


Accuracy: 0.47512437810945274
          Date  Predicted_Signal
2   2012-01-23                 1
3   2012-01-24                 1
4   2012-01-25                 0
5   2012-01-26                 0
6   2012-02-02                 0
7   2012-02-03                 1
8   2012-02-06                 1
9   2012-02-07                 1
10  2012-02-08                 1
11  2012-02-09                 1
12  2012-02-16                 1
13  2012-02-17                 1
14  2012-02-21                 1
15  2012-02-22                 1
16  2012-02-23                 1
17  2012-02-24                 1
18  2012-03-02                 1
19  2012-03-05                 1


In [25]:
import pandas as pd

# submission_bollinger.csvの読み込み
submission_path = '/home-asustor/teramoto/SIGNATE/EX/StockPrice_prediction/data/submission_bollinger.csv'
submission = pd.read_csv(submission_path)

# 5, 11, 17...行目を抽出
filtered_submission = submission.iloc[3::6, :].reset_index(drop=True)

# 新しいファイルに保存
new_submission_path = '/home-asustor/teramoto/SIGNATE/EX/StockPrice_prediction/data/submission_bollinger_filtered.csv'
filtered_submission.to_csv(new_submission_path, index=False)

# 結果を表示
print(filtered_submission)



     2012-03-05  1
0    2012-03-09  1
1    2012-03-23  0
2    2012-04-09  0
3    2012-04-23  0
4    2012-05-07  0
..          ... ..
192  2019-10-25  1
193  2019-11-08  1
194  2019-11-22  1
195  2019-12-09  1
196  2019-12-23  1

[197 rows x 2 columns]
