In [1]:
!pip install yfinance

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting yfinance
  Downloading yfinance-0.1.86-py2.py3-none-any.whl (29 kB)
Collecting requests>=2.26
  Downloading requests-2.28.1-py3-none-any.whl (62 kB)
[K     |████████████████████████████████| 62 kB 1.1 MB/s 
Installing collected packages: requests, yfinance
  Attempting uninstall: requests
    Found existing installation: requests 2.23.0
    Uninstalling requests-2.23.0:
      Successfully uninstalled requests-2.23.0
Successfully installed requests-2.28.1 yfinance-0.1.86


In [2]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, fbeta_score
from sklearn.metrics import classification_report, confusion_matrix, make_scorer
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import GridSearchCV, TimeSeriesSplit
from sklearn.linear_model import LogisticRegression

In [3]:
start_date = '2019-11-01'
end_date = '2022-10-31'

df = yf.download('NIO', start = start_date, end = end_date, interval = '1d', progress = False, auto_adjust = True)
df.drop(columns = ['Volume'], inplace = True)
df['Target'] = np.where(df['Close'] > df['Close'].shift(1), 1, 0)
df

Unnamed: 0_level_0,Open,High,Low,Close,Target
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-11-01,1.49,1.55,1.49,1.52,0
2019-11-04,1.65,1.90,1.58,1.71,1
2019-11-05,1.88,2.38,1.80,2.34,1
2019-11-06,2.46,2.46,1.96,2.03,0
2019-11-07,2.11,2.20,2.05,2.07,1
...,...,...,...,...,...
2022-10-24,10.01,10.03,8.38,9.45,0
2022-10-25,9.81,10.63,9.53,10.63,1
2022-10-26,10.44,11.09,10.23,10.81,1
2022-10-27,10.53,10.73,9.97,9.98,0


In [4]:
df['Open_1'] = df['Open'].shift(1)
df['High_1'] = df['High'].shift(1)
df['Low_1'] = df['Low'].shift(1)
df['Close_1'] = df['Close'].shift(1)
features = ['Open_1','High_1','Low_1','Close_1']
df.dropna(inplace=True)
df[features]

Unnamed: 0_level_0,Open_1,High_1,Low_1,Close_1
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-11-04,1.49,1.55,1.49,1.52
2019-11-05,1.65,1.90,1.58,1.71
2019-11-06,1.88,2.38,1.80,2.34
2019-11-07,2.46,2.46,1.96,2.03
2019-11-08,2.11,2.20,2.05,2.07
...,...,...,...,...
2022-10-24,10.67,11.21,10.41,11.21
2022-10-25,10.01,10.03,8.38,9.45
2022-10-26,9.81,10.63,9.53,10.63
2022-10-27,10.44,11.09,10.23,10.81


In [5]:
X = df[features].values
y = df.Target
X

array([[ 1.49000001,  1.54999995,  1.49000001,  1.51999998],
       [ 1.64999998,  1.89999998,  1.58000004,  1.71000004],
       [ 1.88      ,  2.38000011,  1.79999995,  2.33999991],
       ...,
       [ 9.81000042, 10.63000011,  9.52999973, 10.63000011],
       [10.43999958, 11.09000015, 10.22999954, 10.81000042],
       [10.52999973, 10.72999954,  9.97000027,  9.97999954]])

In [6]:
index = int(len(y)*0.9)

Logistic Regression

In [7]:
X_train, y_train = X[:index:], y[:index:]
X_test, y_test = X[index:], y[index:]

lr= LogisticRegression()
lr.fit(X_train, y_train)
y_pred=lr.predict(X_test)

In [8]:
precision_score(y_test,y_pred)

0.43636363636363634

Naive Bayes

In [9]:
from sklearn.naive_bayes import GaussianNB
nb= GaussianNB()
nb.fit(X_train, y_train)
y_pred=nb.predict(X_test)

In [10]:
precision_score(y_test,y_pred)

0.4473684210526316

K-Nearest Neighbours

In [11]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=15)
knn.fit(X_train,y_train)
y_pred=knn.predict(X_test)

In [12]:
precision_score(y_test,y_pred)

0.44

Decision Tree

In [13]:
from sklearn.tree import DecisionTreeClassifier
dtree= DecisionTreeClassifier(max_depth=10, random_state=101, max_features = None, min_samples_leaf = 15)
dtree.fit(X_train, y_train)
y_pred=dtree.predict(X_test)

In [14]:
precision_score(y_test,y_pred)

0.44642857142857145

Random Forest

In [15]:
from sklearn.ensemble import RandomForestClassifier
rfm = RandomForestClassifier(n_estimators=70, oob_score=True, n_jobs=-1, random_state=101, max_features = None, min_samples_leaf =30)
rfm.fit(X_train,y_train)
y_pred=rfm.predict(X_test)

In [16]:
precision_score(y_test,y_pred)

0.391304347826087

In [17]:
df.to_csv("NIO.csv")