In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from scipy.optimize import minimize
from pandas_datareader import DataReader
import seaborn as sns; sns.set(color_codes=True)
import yfinance as yf
from ta.momentum import RSIIndicator
from ta.trend import MACD
import getFamaFrenchFactors as gff
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

In [3]:
start = '2021-4-16'
end = '2023-4-16'
tesla = yf.download('TSLA', start, end)

tesla.index = pd.to_datetime(tesla.index)
tesla['Date'] = tesla.index.date
tesla.set_index('Date', inplace=True)

tesla=tesla.reset_index()


[*********************100%***********************]  1 of 1 completed


In [4]:
# Calculate OBV
tsla = tesla.copy()
tsla['daily_return'] = tsla['Adj Close'].pct_change()
tsla['direction'] = np.where(tsla['daily_return'] >= 0, 1, -1)
tsla['direction'][0] = 0
tsla['vol_adjusted'] = tsla['Volume'] * tsla['direction']
tsla['OBV'] = tsla['vol_adjusted'].cumsum()

# Calculate additional features
tsla['mom_5_20'] = (tsla['Close'] / tsla['Close'].shift(5)) - 1
tsla['mom_20_100'] = (tsla['Close'] / tsla['Close'].shift(20)) - 1
tsla['mom_60_200'] = (tsla['Close'] / tsla['Close'].shift(60)) - 1

rsi_indicator = RSIIndicator(close=tsla['Adj Close'], window=14)
macd_indicator = MACD(close=tsla['Adj Close'])

tsla['rsi'] = rsi_indicator.rsi()
tsla['macd'] = macd_indicator.macd()

# Calculate Fama French 3 factors
ff_data = gff.famaFrench3Factor(frequency='m') 
ff_data.rename(columns={"date_ff_factors": 'Date'}, inplace=True)
ff_data.set_index('Date',inplace=True)
ff_data = ff_data.resample('D').interpolate()

# Reset index and convert all dates to same timezones, so they become mergable
ff_data.reset_index(inplace=True)
ff_data['Date'] = pd.to_datetime(ff_data['Date']).dt.date

tsla = ff_data.merge(tsla,on='Date')
tesla_returns = pd.DataFrame({
    'avg_close_20_days_': tsla['Adj Close'].rolling(window=20).mean().shift(1),
    'Fama_French_Mkt_RF':tsla['Mkt-RF'].shift(1),
    'mom_60_200':  tsla['mom_60_200'].shift(1),
    'TSLA_MACD':tsla['macd'].shift(1),
    'Fama_French_HML' : tsla['HML'].shift(1),
    'TSLA_CLOSE': tsla['Adj Close']
})

tesla_returns.dropna(inplace=True)

scaler = StandardScaler()
X = scaler.fit_transform(tesla_returns)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tsla['direction'][0] = 0


In [5]:
# Split the data into features (X) and target (y)
y = tesla_returns['TSLA_CLOSE']
X = tesla_returns.drop(columns=['TSLA_CLOSE'],axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
# Define the model and fit the data
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = rf_model.predict(X_test)

# Evaluate the model performance using mean squared error
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
print('RMSE value is:', rmse)                                                                      

RMSE value is: 12.64888297213868
