In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from google.colab import drive
drive.mount('/content/drive')

# Googleドライブのパスを指定
file_amat = '/content/drive/My Drive/path_to_your_file/amat.us.csv'

# データの読み込み
# file_amat = './amat.us.csv'
data_a = pd.read_csv(file_amat)

# 日付をdatetime型に変換
data_a['Date'] = pd.to_datetime(data_a['Date'])

# 訓練データとテストデータに分割
start_date = '2000-01-01'
end_date = '2015-12-31'
training_data_a = data_a[(data_a['Date'] >= start_date) & (data_a['Date'] <= end_date)].dropna()

test_start_date = '2016-01-01'
test_end_date = '2017-01-01'
test_data_a = data_a[(data_a['Date'] >= test_start_date) & (data_a['Date'] <= test_end_date)].dropna()

# 特徴量とターゲットを設定
# NumPy配列に変換
X_train = training_data_a[['Open', 'High', 'Low', 'Volume']].values  
# NumPy配列に変換
y_train = training_data_a['Close'].values 

X_test = test_data_a[['Open', 'High', 'Low', 'Volume']].values  # NumPy配列に変換
y_test = test_data_a['Close'].values  # NumPy配列に変換

# NumPy配列の中身を出力
print("X_train:")
print(X_train)
print("y_train:")
print(y_train)
print("X_test:")
print(X_test)
print("y_test:")
print(y_test)

# ランダムフォレストモデルを訓練
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# テストデータで予測する
y_pred = model.predict(X_test)

# モデルの精度を測定する
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

# 予測結果をプロットする
plt.figure(figsize=(14, 7))
plt.plot(test_data_a['Date'], y_test, label='Actual AMAT Prices', color='b')
plt.plot(test_data_a['Date'], y_pred, label='Predicted AMAT Prices', color='r')
plt.xlabel('Date')
plt.ylabel('Stock Price')
plt.title('Actual vs Predicted AMAT Stock Prices (2016)')
plt.legend()
plt.show()
