In [1]:
# This program uses machine learning to predict the price of a stock V2 using a decision tree
# import the libraries
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
import numpy as np 
import pandas as pd 

In [6]:
# load the data and store it to a var
df = pd.read_csv('Data/TSLA.csv')
# set the date as the index
df = df.set_index(pd.DatetimeIndex(df['Date'].values))
# give the index a name
df.index.name = 'Date'

In [7]:
df.head(6)

Unnamed: 0_level_0,Date,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2019-09-16,2019-09-16,49.200001,49.486,48.234001,48.562,48.562,23640500
2019-09-17,2019-09-17,48.493999,49.119999,48.074001,48.958,48.958,19327000
2019-09-18,2019-09-18,49.0,49.633999,48.473999,48.698002,48.698002,20851000
2019-09-19,2019-09-19,49.200001,49.588001,48.967999,49.32,49.32,23979000
2019-09-20,2019-09-20,49.298,49.389999,47.632,48.124001,48.124001,31765000
2019-09-23,2019-09-23,48.0,49.035999,47.844002,48.245998,48.245998,21701000


In [8]:
# manipulate the data
# create a new column to store if close price changed either up (1) or down (0)
df['Price_up'] = np.where(df['Close'].shift(-1) > df['Close'], 1, 0)
# remove date column becuase it is already in index
df = df.drop(columns=['Date'])
# show the data
df.head(6)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Price_up
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2019-09-16,49.200001,49.486,48.234001,48.562,48.562,23640500,1
2019-09-17,48.493999,49.119999,48.074001,48.958,48.958,19327000,0
2019-09-18,49.0,49.633999,48.473999,48.698002,48.698002,20851000,1
2019-09-19,49.200001,49.588001,48.967999,49.32,49.32,23979000,0
2019-09-20,49.298,49.389999,47.632,48.124001,48.124001,31765000,1
2019-09-23,48.0,49.035999,47.844002,48.245998,48.245998,21701000,0


In [9]:
# split the data set into a feature set and a target set

# X is set to all rows (:), and all columns except the target column in the end (df.shape[1] is the columns, and -1 is the last column in columns)as an array (.values)
X = df.iloc[:, 0:df.shape[1]-1].values
# Y is all the rows (:), and only the target column as an array
Y = df.iloc[:, df.shape[1]-1].values

In [10]:
# split data into training (80%) and testing (20%) sets
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size = 0.2)

In [11]:
# Create and train the model (DecisionTreeClassifier)
tree = DecisionTreeClassifier().fit(x_train, y_train)

# show how well the model did on the test data set
print(tree.score(x_test, y_test))

0.45098039215686275


In [12]:
# show the models predictions
tree_predictions = tree.predict(x_test)
print(tree_predictions)

[1 0 1 1 1 1 0 1 1 0 0 1 0 1 0 1 0 1 1 1 1 1 1 1 0 1 1 0 1 1 0 0 1 1 1 0 1
 0 1 1 1 1 0 0 1 1 1 1 0 0 0]


In [13]:
# show the actual values
y_test

array([1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0,
       1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1,
       1, 1, 0, 0, 0, 1, 0])

In [None]:
# this model doesn't take in the right parameters, just using close price is not even enough to start with, a proper stock predictor needs to machine learn with every indicator and news source possible, a very complex algorithm is needed