In [1]:
import yfinance as yf
from sklearn import svm
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd

# Function to create sliding window of data
def create_sliding_window(data, window_size):
    X = []
    for i in range(len(data) - window_size - 1):
        X.append(data[i: (i + window_size)])
    return np.array(X)

# Download historical data
ticker = 'AAPL'
data = yf.download(ticker, start='2020-01-01', end='2023-05-30')

# Calculate the daily returns
data['returns'] = data['Close'].pct_change()

# Drop missing values
data = data.dropna()

# Add a column 'direction' which is our target variable. 1 for positive returns, 0 for negative
data.loc[:, 'direction'] = np.where(data['returns'] > 0, 1, 0)
# Prepare data for SVM
X = data['returns'].values  # features
y = data['direction'].values  # target

# Create sliding window of past 5 days
window_size = 5
X = create_sliding_window(X, window_size)

# The target variable needs to be shifted accordingly
y = y[window_size:]

# Scale the features
scaler = MinMaxScaler(feature_range=(-1, 1))
X = scaler.fit_transform(X)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Train the SVM classifier
clf = svm.SVC()
clf.fit(X_train, y_train)

# Make predictions
y_pred = clf.predict(X_test)

# Print a classification report
print(classification_report(y_test, y_pred))


[*********************100%***********************]  1 of 1 completed


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['direction'] = np.where(data['returns'] > 0, 1, 0)


ValueError: Found input variables with inconsistent numbers of samples: [850, 851]