In [2]:
import yfinance as yf
import datetime

# Define start and end dates
start = datetime.datetime(2019, 1, 2)
end = datetime.datetime(2020, 12, 31)

# Define list of tickers
tickers = ['AAPL', 'HD', 'JNJ', 'JPM', 'MSFT', 'UNH', 'V', 'XOM']

# Use yfinance to download adjusted close prices for the tickers
prices = yf.download(tickers, start=start, end=end)['Adj Close']
dji = yf.download("^DJI", start = start, end = end)["Adj Close"]

[*********************100%***********************]  8 of 8 completed
[*********************100%***********************]  1 of 1 completed


In [3]:
prices

Unnamed: 0_level_0,AAPL,HD,JNJ,JPM,MSFT,UNH,V,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2019-01-02,38.047043,155.546677,114.144203,86.554688,96.632683,228.546432,129.230560,55.378765
2019-01-03,34.257278,152.118347,112.330406,85.324600,93.077744,222.313873,124.573532,54.528503
2019-01-04,35.719704,156.638351,114.215683,88.470116,97.406723,224.913910,129.940338,56.538937
2019-01-07,35.640194,159.723846,113.483025,88.531647,97.530952,225.345673,132.283386,56.832954
2019-01-08,36.319607,160.490707,116.118828,88.364700,98.238098,228.358658,133.002853,57.246178
...,...,...,...,...,...,...,...,...
2020-12-23,129.209290,255.949387,143.379517,116.491623,216.605499,327.533020,202.118805,37.559818
2020-12-24,130.205765,257.002380,143.879669,115.979332,218.300949,330.637695,205.466095,37.406956
2020-12-28,134.862701,255.418167,144.559128,116.743095,220.466782,335.644012,209.335251,37.532848
2020-12-29,133.067032,252.515320,145.455582,116.435730,219.672989,337.002319,211.048264,37.110218


In [4]:
dji

Date
2019-01-02    23346.240234
2019-01-03    22686.220703
2019-01-04    23433.160156
2019-01-07    23531.349609
2019-01-08    23787.449219
                  ...     
2020-12-23    30129.830078
2020-12-24    30199.869141
2020-12-28    30403.970703
2020-12-29    30335.669922
2020-12-30    30409.560547
Name: Adj Close, Length: 504, dtype: float64

In [5]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

# Fill in missing values using previous day's value
prices = prices.fillna(method='ffill')

# Calculate the daily log returns
returns = pd.DataFrame()
for ticker in tickers:
    returns[ticker] = prices[ticker].apply(lambda x: np.log(x)).diff()

returns = returns.dropna()
# Calculate the next day's DJI return and define labels as sign of the return
dji_returns = dji.apply(lambda x: np.log(x)).diff()
dji_returns = dji_returns.dropna()

labels = dji_returns.apply(lambda x: 1 if x > 0 else -1)

# Split data into training and test datasets
split_date = datetime.datetime(2019, 1, 4) + pd.DateOffset(days=int(prices.shape[0]*0.8))
X_train = returns.loc[:split_date]
X_test = returns.loc[split_date:]
y_train = labels.loc[:split_date]
y_test = labels.loc[split_date:]

# Standardize the feature values
scal = StandardScaler()
X_train = scal.fit_transform(X_train)
X_test = scal.transform(X_test)


In [6]:
returns

Unnamed: 0_level_0,AAPL,HD,JNJ,JPM,MSFT,UNH,V,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2019-01-03,-0.104924,-0.022287,-0.016018,-0.014314,-0.037482,-0.027649,-0.036702,-0.015473
2019-01-04,0.041803,0.029281,0.016644,0.036202,0.045460,0.011627,0.042179,0.036206
2019-01-07,-0.002228,0.019507,-0.006435,0.000695,0.001275,0.001918,0.017871,0.005187
2019-01-08,0.018884,0.004790,0.022961,-0.001888,0.007224,0.013282,0.005424,0.007245
2019-01-09,0.016839,0.010290,-0.007957,-0.001692,0.014198,0.001438,0.011700,0.005261
...,...,...,...,...,...,...,...,...
2020-12-23,-0.007001,-0.000852,-0.005120,0.027561,-0.013125,0.007672,-0.002627,0.012770
2020-12-24,0.007683,0.004106,0.003482,-0.004407,0.007797,0.009434,0.016425,-0.004078
2020-12-28,0.035141,-0.006183,0.004711,0.006564,0.009872,0.015028,0.018656,0.003360
2020-12-29,-0.013404,-0.011430,0.006182,-0.002636,-0.003607,0.004039,0.008150,-0.011324


In [7]:
X_train

array([[-6.48479105, -2.0909718 , -1.66293758, ..., -1.76516304,
        -3.30683416, -1.26776836],
       [ 2.36527302,  2.47926577,  1.58161085, ...,  0.68071096,
         3.5110563 ,  3.04640574],
       [-0.29056098,  1.61302727, -0.71102589, ...,  0.07606273,
         1.4100429 ,  0.45689927],
       ...,
       [-0.98164485, -0.68904941, -1.13857554, ..., -0.73710569,
        -0.2623878 , -0.53105905],
       [ 0.12965522,  1.11187856, -0.0913771 , ..., -0.51669904,
         1.23995366, -0.86244431],
       [-0.52116593,  0.26281328,  0.01325603, ...,  0.76709315,
        -0.9990838 ,  0.81374604]])

In [8]:
X_test

array([[-0.52116593,  0.26281328,  0.01325603, ...,  0.76709315,
        -0.9990838 ,  0.81374604],
       [ 1.25947903, -0.1341214 , -0.66172196, ...,  2.61657496,
         1.33617446,  1.0382988 ],
       [-0.58720699,  0.16627638, -0.73142406, ..., -0.3065758 ,
        -0.02620141, -0.44061943],
       ...,
       [ 1.96343542, -0.66376474,  0.39624844, ...,  0.89246602,
         1.47788598,  0.30438323],
       [-0.96464429, -1.12877297,  0.54235857, ...,  0.2081351 ,
         0.56980994, -0.9214482 ],
       [-0.67267148, -0.42593248,  1.15159304, ..., -0.46791933,
         1.45936038,  0.68876903]])

In [9]:
y_train.value_counts()

 1    161
-1    118
Name: Adj Close, dtype: int64

In [10]:
y_train

Date
2019-01-03   -1
2019-01-04    1
2019-01-07    1
2019-01-08    1
2019-01-09    1
             ..
2020-02-05    1
2020-02-06    1
2020-02-07   -1
2020-02-10    1
2020-02-11   -1
Name: Adj Close, Length: 279, dtype: int64

In [11]:
y_test

Date
2020-02-11   -1
2020-02-12    1
2020-02-13   -1
2020-02-14   -1
2020-02-18   -1
             ..
2020-12-23    1
2020-12-24    1
2020-12-28    1
2020-12-29   -1
2020-12-30    1
Name: Adj Close, Length: 225, dtype: int64

In [12]:
from sklearn.svm import SVC

# Fit SVM model with RBF kernel and C=1
svm_mod = SVC(kernel='rbf', gamma=1, C=1)
svm_mod.fit(X_train, y_train)

SVC(C=1, gamma=1)

In [13]:
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.svm import SVC

# Set up the SVM model with RBF kernel, gamma=1, and C=1
svm_model = SVC(kernel='rbf', gamma=1, C=1)

# Perform time series cross-validation with 5 splits
tscv = TimeSeriesSplit(n_splits=5)

# Compute the cross-validated scores for the trained model
cv_scores = cross_val_score(svm_model, X_train, y_train, cv=tscv)

# Compute the mean of the cross-validated scores
mean_cv_score = cv_scores.mean()

print("Mean cross-validated score: {:.4f}".format(mean_cv_score))


Mean cross-validated score: 0.7043


In [14]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

param_grid = {'C': [0.1, 1, 10, 100], 'gamma': [0.1, 0.2, 0.3, 0.4, 0.5, 1, 5, 10]}
svm = SVC(kernel='rbf')
grid_search = GridSearchCV(svm, param_grid, cv=TimeSeriesSplit(n_splits=5))
grid_search.fit(X_train, y_train)

print("Best parameters: ", grid_search.best_params_)
print("Best cross-validation score: ", grid_search.best_score_)


Best parameters:  {'C': 1, 'gamma': 0.1}
Best cross-validation score:  0.8478260869565217


In [15]:
split_date

Timestamp('2020-02-11 00:00:00')

In [16]:
prices.iloc[279]

AAPL     78.344833
HD      223.943039
JNJ     139.630753
JPM     124.826881
MSFT    178.861359
UNH     277.559357
V       199.551575
XOM      51.127159
Name: 2020-02-11 00:00:00, dtype: float64

In [17]:
dji.iloc[437] # 2020-09-30

27173.9609375

In [18]:
len(prices["AAPL"]) == len(dji)

True

In [19]:
prices.loc[datetime.datetime(2020, 9, 30)]

AAPL    114.065018
HD      262.014862
JNJ     139.522430
JPM      88.842567
MSFT    205.590759
UNH     301.397797
V       196.575226
XOM      30.142498
Name: 2020-09-30 00:00:00, dtype: float64

In [20]:
datetime.datetime(2020, 9, 30)-pd.DateOffset(days=637)

Timestamp('2019-01-02 00:00:00')

In [21]:
import numpy as np
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import yfinance as yf
import datetime

# Define start and end dates
start = datetime.datetime(2019, 1, 2)
end = datetime.datetime(2021, 1, 31)

# Define list of tickers
tickers = ['AAPL', 'HD', 'JNJ', 'JPM', 'MSFT', 'UNH', 'V', 'XOM']

# Use yfinance to download adjusted close prices for the tickers
prices = yf.download(tickers, start=start, end=end)['Adj Close']
dji = yf.download("^DJI", start = start, end = end)["Adj Close"]


# Fill in missing values using previous day's value
prices = prices.fillna(method='ffill')

# Calculate the daily log returns
returns = pd.DataFrame()
for ticker in tickers:
    returns[ticker] = prices[ticker].apply(lambda x: np.log(x)).diff()

returns = returns.dropna()
# Calculate the next day's DJI return and define labels as sign of the return
dji_returns = dji.apply(lambda x: np.log(x)).diff()
dji_returns = dji_returns.dropna()

labels = dji_returns.apply(lambda x: 1 if x > 0 else -1)

# Split data into training and test datasets
split_date = datetime.datetime(2020, 10, 1)
X_train = returns.loc[:split_date]
X_test = returns.loc[split_date:]
y_train = labels.loc[:split_date]
y_test = labels.loc[split_date:]

# Standardize the feature values
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

[*********************100%***********************]  8 of 8 completed
[*********************100%***********************]  1 of 1 completed


In [22]:
# Train an SVM model with RBF kernel and optimized hyperparameters and implement the trading strategy on that
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, TimeSeriesSplit
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

param_grid = {'C': [0.1, 1, 10, 100], 'gamma': [0.1, 0.2, 0.3, 0.4, 0.5, 1, 5, 10]}
svm = SVC(kernel='rbf')
grid = GridSearchCV(svm, param_grid=param_grid, cv=TimeSeriesSplit(n_splits=5), scoring='accuracy')
pipe = Pipeline([('scaler', StandardScaler()), ('svm', grid)])
pipe.fit(X_train, y_train)
print(f'Best hyperparameters: {pipe.named_steps["svm"].best_params_}')

# Simulate a trading strategy using the trained SVM model
capital = 10000  # initial capital amount
shares = 0  # number of shares held
return_by_strat = []  # daily returns
for i in range(len(X_test)):
    signal = pipe.predict(X_test[i].reshape(1, -1))[0]
    j = 437 + i + 1 # starting 1st October
    if signal == 1 and shares == 0:
        shares = capital / dji[j]
        capital = 0
    elif signal == -1 and shares > 0:
        capital = shares * dji[j]
        shares = 0
    return_by_strat.append((capital + shares * dji[j]) / 10000 - 1)
    
# Compute the final amount at market close of Dec 31st 2020
final_amount = (capital + shares * dji[-1]) / 10000 * 10000
print(f'Final amount at market close of Dec 31st 2020: ${final_amount:.2f}')


Best hyperparameters: {'C': 1, 'gamma': 0.1}
Final amount at market close of Dec 31st 2020: $11215.18


In [23]:
# Simulate a trading strategy using the previously trained SVM model with Gamma = C = 1 as instructed by TA
capital = 10000  # initial capital amount
shares = 0  # number of shares held
return_by_strat = []  # daily returns
X_during_backtest_period = returns.loc[datetime.datetime(2020, 10, 1)+pd.DateOffset(days=1):]
X_during_backtest_period = scal.transform(X_during_backtest_period)

for i in range(len(X_during_backtest_period)):
    signal = svm_mod.predict(X_during_backtest_period[i].reshape(1, -1))[0]
    j = 441 + i # starting 1st October
    if signal == 1 and shares == 0:
        shares = capital / dji[j]
        capital = 0
    elif signal == -1 and shares > 0:
        capital = shares * dji[j]
        shares = 0
    return_by_strat.append((capital + shares * dji[j]) / 10000 - 1)
    
# Compute the final amount at market close of Dec 31st 2020
final_amount = (capital + shares * dji[-1])
print(f'Final amount at market close of Dec 31st 2020: ${final_amount:.2f}')


Final amount at market close of Dec 31st 2020: $11392.75


In [24]:
# Simple Buy and Hold Strategy Implementation
import numpy as np

# Number of shares bought on Oct 1st 2020 with initial capital
num_shares = 10000 / dji[438]

# Final amount at market close of Dec 31st 2020
final_amount = num_shares * dji[-1]

print(f"Final amount for buy-and-hold strategy: ${np.round(final_amount, 2)}")


Final amount for buy-and-hold strategy: $10869.55


I think we will make more money by the SVM strategy as per our backtesting above in the given periods of time if we optimally train it. It is around 400 more dollars in Profit. But, if we train our SVm model with the given Gamma = C = 1 parameters, then our trading strategy would make around 200 dollars less profit compared to the buy-and-hold strategy.