# ロジスティック回帰を用いた株価分析のデモ

In [1]:
# ライブラリのインポート
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pandas_ta as ta
from datetime import date, datetime, timedelta
import talib
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split  
from sklearn.linear_model import LogisticRegression  
from sklearn.metrics import accuracy_score, confusion_matrix
import warnings
#warnings.simplefilter(action="ignore", category=FutureWarning)

In [2]:
# Download data
ticker = '^N225'
end_date = datetime.today()
start_date = end_date - timedelta(days=730)
data = yf.download(ticker, start_date, end_date)
data.columns = [col[0] if isinstance(col, tuple) else col for col in data.columns]
data.index = pd.to_datetime(data.index)

[*********************100%***********************]  1 of 1 completed


In [3]:
data.tail()

Unnamed: 0_level_0,Close,High,Low,Open,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2025-02-03,38520.089844,38948.609375,38401.820312,38932.660156,169200000
2025-02-04,38798.371094,39192.511719,38590.960938,39078.660156,144300000
2025-02-05,38831.480469,39118.949219,38682.179688,38979.210938,151200000
2025-02-06,39066.53125,39190.128906,38851.511719,38977.578125,154700000
2025-02-07,38787.019531,39007.941406,38753.199219,38907.761719,0


In [4]:
# Create features based on prices 
data['SMA50'] = data['Close'].rolling(window=50).mean() 
data['SMA200'] = data['Close'].rolling(window=200).mean() 
data['Volatility'] = data['Close'].rolling(window=20).std() 
data['Daily Return'] = data['Close'].pct_change() 
# calculate MACD
data["macd"], data["macdsignal"], data["macdhist"] = talib.MACD(data["Close"],\
        fastperiod=12, slowperiod=26, signalperiod=9)
# calculate RSI
data["RSI"] = talib.RSI(data["Close"], timeperiod=14)

In [5]:
# Remove NaN rows 
data = data.dropna().reset_index()

In [6]:
# Create binary target label for prediction
data['Target'] = np.where(data['Close'].shift(-1) > data['Close'], 1, 0)

In [7]:
# Select features and target
features = ['macd','macdsignal','RSI']
X = data[features]
y = data['Target']
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
# Create and train the model
model = LogisticRegression()  
model.fit(X_train, y_train)

In [9]:
# Evaluate the model
y_pred = model.predict(X_test)  
print("Accuracy:", accuracy_score(y_test, y_pred))  
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

Accuracy: 0.4745762711864407
Confusion Matrix:
 [[ 8 23]
 [ 8 20]]


In [10]:
print(X.tail(1).to_numpy())

[[-74.85732437  -9.31918894  45.60427682]]


In [11]:
new_data = X.tail(1)
prediction = model.predict(new_data)  
print("Prediction:", prediction)

Prediction: [1]


In [12]:
# Get probability predictions
probabilities = model.predict_proba(X_test)
# Print the first few probability predictions
print(probabilities[-1])

[0.46934124 0.53065876]
