In [1]:
import pandas as pd
import numpy as np
from sklearn import svm
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsRegressor
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score

# Loading Data Set

In [2]:

df = pd.read_csv("google-stock-dataset-Monthly.csv")
df.head(10)

Unnamed: 0.1,Unnamed: 0,Date,Price,High,Low,Close,Volume,Adj Close
0,0,2013-05-01,20.6,23.04,20.43,21.8,1974715308,21.8
1,1,2013-06-01,21.85,22.79,21.2,22.03,1941280776,22.03
2,2,2013-07-01,22.18,23.22,21.91,22.22,2047770180,22.22
3,3,2013-08-01,22.4,22.77,21.16,21.19,1338144516,21.19
4,4,2013-09-01,21.38,22.67,21.37,21.92,1323615060,21.92
5,5,2013-10-01,22.03,26.06,21.1,25.79,2206771020,25.79
6,6,2013-11-01,25.82,26.73,25.15,26.52,1049213736,26.52
7,7,2013-12-01,26.61,28.05,26.25,28.05,1352374272,28.05
8,8,2014-01-01,27.91,29.69,27.08,29.55,2178287532,29.55
9,9,2014-02-01,29.51,30.75,28.23,30.42,1620230148,30.42


In [3]:
df.drop('Unnamed: 0', inplace=True, axis=1)

In [4]:
df.head(10)

Unnamed: 0,Date,Price,High,Low,Close,Volume,Adj Close
0,2013-05-01,20.6,23.04,20.43,21.8,1974715308,21.8
1,2013-06-01,21.85,22.79,21.2,22.03,1941280776,22.03
2,2013-07-01,22.18,23.22,21.91,22.22,2047770180,22.22
3,2013-08-01,22.4,22.77,21.16,21.19,1338144516,21.19
4,2013-09-01,21.38,22.67,21.37,21.92,1323615060,21.92
5,2013-10-01,22.03,26.06,21.1,25.79,2206771020,25.79
6,2013-11-01,25.82,26.73,25.15,26.52,1049213736,26.52
7,2013-12-01,26.61,28.05,26.25,28.05,1352374272,28.05
8,2014-01-01,27.91,29.69,27.08,29.55,2178287532,29.55
9,2014-02-01,29.51,30.75,28.23,30.42,1620230148,30.42


In [5]:
print("Size/Shape of the dataset: ",df.shape)

Size/Shape of the dataset:  (120, 7)


In [6]:
print(df.dtypes)

Date          object
Price        float64
High         float64
Low          float64
Close        float64
Volume         int64
Adj Close    float64
dtype: object


In [7]:
X = df.drop(['Date', 'Close'], axis=1).values
y = np.where(df['Close'].shift(-1) > df['Close'], 1, -1)

In [8]:
print(y)

[ 1  1 -1  1  1  1  1  1  1 -1 -1  1  1 -1  1  1 -1 -1 -1  1  1 -1 -1 -1
 -1  1 -1 -1  1  1  1 -1 -1  1 -1  1 -1  1 -1  1  1 -1  1  1  1  1  1  1
 -1  1  1  1  1  1  1  1 -1 -1 -1  1  1  1  1 -1 -1  1 -1  1  1  1  1 -1
 -1  1 -1  1  1  1  1  1 -1 -1  1  1 -1  1  1 -1  1  1 -1  1  1  1  1  1
  1  1  1 -1  1 -1  1 -1 -1  1 -1 -1 -1  1 -1 -1 -1  1 -1  1 -1  1  1 -1]


# Splitting of data

In [9]:
split = int(0.8 * len(df))
X_train, X_test, y_train, y_test = X[:split], X[split:], y[:split], y[split:]

# SVM Model

In [10]:
svm_model = svm.SVC()
svm_model.fit(X_train, y_train)

SVC()

In [11]:
svm_pred = svm_model.predict(X_test)
svm_accuracy = accuracy_score(y_test, svm_pred)

In [12]:
svm_pred

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1])

In [13]:
print('SVM accuracy:', svm_accuracy)

SVM accuracy: 0.4583333333333333


# Naive Bayes

In [14]:
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)

GaussianNB()

In [15]:
nb_pred = nb_model.predict(X_test)
nb_accuracy = accuracy_score(y_test, nb_pred)

In [16]:
nb_pred

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1])

In [17]:
print('Naive Bayes accuracy:', nb_accuracy)

Naive Bayes accuracy: 0.4583333333333333


# K Nearest Neighbour

In [18]:
knn_model = KNeighborsRegressor(n_neighbors=5)
knn_model.fit(X_train, y_train)

KNeighborsRegressor()

In [19]:
knn_pred = knn_model.predict(X_test)
knn_accuracy = accuracy_score(y_test, np.where(knn_pred > 0, 1, -1))

In [20]:
print('KNN accuracy:', knn_accuracy)

KNN accuracy: 0.5416666666666666


# K Means

In [21]:
kmeans_model = KMeans(n_clusters=2)
kmeans_model.fit(X)

KMeans(n_clusters=2)

In [22]:
kmeans_labels = kmeans_model.labels_

In [23]:
print('K-Means cluster labels:', kmeans_labels)


K-Means cluster labels: [1 1 1 1 1 1 0 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0]


In [24]:
from sklearn.datasets import make_blobs
from sklearn.metrics import silhouette_score
silhouette = silhouette_score(X, kmeans_model.labels_, metric='euclidean')
print('Silhouette score:', silhouette)

Silhouette score: 0.7617650336807331
