### Importing Libraries

In [169]:
import pandas as pd
import requests
import numpy as np
from bs4 import BeautifulSoup

### Create an empty dataframe to store our data

In [170]:
data = pd.DataFrame()
pd.options.mode.chained_assignment = None #Prevent error Warning of a copy in memory

### Build a simple Web Scraper to import Stock Market data from Ngtradeonline.com 

In [171]:
count = 0 
while count < 20    :
    res = requests.get("http://www.ngtradeonline.com/Home/PriceHistory?page="+str(count)+"&stockName=GUARANTY")
    soup = BeautifulSoup(res.content,'lxml') #Use Beautiful soup to prettify our data
    table = soup.find_all('table')[0] #Find all the html tag named 'table'
    html_data = pd.read_html(str(table))[0] #Read the content of found html tables
    data = data.append(html_data) # Append to the empty Dataframe above
    print("page "+str(count)+" Done.") #Keep track of each web page read
    count = count + 1
data.to_csv('GTB_data.csv') # Export the data to a CSV file

page 0 Done.
page 1 Done.
page 2 Done.
page 3 Done.
page 4 Done.
page 5 Done.
page 6 Done.
page 7 Done.
page 8 Done.
page 9 Done.
page 10 Done.
page 11 Done.
page 12 Done.
page 13 Done.
page 14 Done.
page 15 Done.
page 16 Done.
page 17 Done.
page 18 Done.
page 19 Done.


In [172]:
data.head(20)

Unnamed: 0,Symbol,Low,Open,Price,Volume,High,Change,Date
0,GUARANTY,31.95,34.5,32.5,0,33.35,-2.0,9/11/2018
1,GUARANTY,34.5,35.0,34.5,0,35.0,-0.5,9/10/2018
2,GUARANTY,34.4,34.95,35.0,0,35.3,0.05,9/7/2018
3,GUARANTY,34.95,36.65,34.95,0,36.5,-1.7,9/6/2018
4,GUARANTY,36.65,37.05,36.65,0,36.85,-0.4,9/5/2018
5,GUARANTY,36.5,36.5,37.05,0,37.45,0.55,9/4/2018
6,GUARANTY,36.05,36.0,36.5,0,36.75,0.5,9/3/2018
7,GUARANTY,36.0,38.0,36.0,0,37.85,-2.0,8/31/2018
8,GUARANTY,37.9,39.05,38.0,0,38.0,-1.05,8/30/2018
9,GUARANTY,38.65,39.0,39.05,0,39.05,0.05,8/29/2018


### Data transformation

In [176]:
#Create a column to hold the percentage spread of the closing price
data['HL_PCT'] = (data['High']- data['Low'])/data['Price'] *100.0

#Create a column to hold the Percentage of Price Change
data['PCT_change'] = (data['Price'] - data['Open']) / data['Open'] * 100.0

### Generate a new set of data from the transformation above

In [177]:
new_data = data[['Price', 'HL_PCT', 'PCT_change', 'Volume']]

In [178]:
new_data.head(10)

Unnamed: 0,Price,HL_PCT,PCT_change,Volume
0,32.5,4.307692,-5.797101,0
1,34.5,1.449275,-1.428571,0
2,35.0,2.571429,0.143062,0
3,34.95,4.434907,-4.638472,0
4,36.65,0.545703,-1.079622,0
5,37.05,2.564103,1.506849,0
6,36.5,1.917808,1.388889,0
7,36.0,5.138889,-5.263158,0
8,38.0,0.263158,-2.68886,0
9,39.05,1.024328,0.128205,0


### Import the Machine Learning Algorithms from Scikit Learn library

In [179]:
from sklearn import preprocessing, cross_validation, svm
from sklearn.linear_model import LinearRegression
import math

In [180]:
#new_data.fillna(value=-99999, inplace=True)

### Feature Engineering - Forcasting the stock prices

In [181]:
forcast_value = int(math.ceil(0.01 * len(new_data))) # Deduce the number of times to forcast
# Add a new column that a shifting of the price values based on the size of data
new_data['label'] = new_data['Price'].shift(-forcast_value)

In [182]:
new_data.head()

Unnamed: 0,Price,HL_PCT,PCT_change,Volume,label
0,32.5,4.307692,-5.797101,0,35.0
1,34.5,1.449275,-1.428571,0,34.95
2,35.0,2.571429,0.143062,0,36.65
3,34.95,4.434907,-4.638472,0,37.05
4,36.65,0.545703,-1.079622,0,36.5


In [183]:
new_data.dropna(inplace= True)

### Creating the features and the label for training the model

In [184]:
x = np.array(new_data.drop(['Price'], axis = 1))
y = np.array(new_data['Price'])

### Pre-processing: Scaling the data to normalize it's values

In [185]:
#Pre-processing
x = preprocessing.scale(x)

In [186]:
# Splitting the data into training and test set
x_train, x_test, y_train, y_test = cross_validation.train_test_split(x, y, test_size=0.2)

### Building the Machine Learning Model

### Using Linear Regression Machine Learning Algorithm

In [187]:
linear_model = LinearRegression()
linear_model.fit(x_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

### Evaluating the Accuracy of the model

In [188]:
accuracy = linear_model.score(x_test, y_test)
accuracy = accuracy * 100
print('Percentage Accuracy = ',np.round(accuracy),'%')

Percentage Accuracy =  93.0 %


### Using Support Vector Regression (SVR) Machine Learning Algorithm

In [189]:
clf = svm.SVR() #Creating an instance of SVR model
clf.fit(x_train, y_train)   # Fitting the model on the data

SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='auto',
  kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

### Evaluating the Accuracy of the model

In [190]:
accuracy = clf.score(x_test, y_test)
accuracy = accuracy * 100
print('Percentage Accuracy = ',np.round(accuracy),'%')

Percentage Accuracy =  88.0 %


### Using multiple kernels for the SVR

In [192]:
for k in ['linear','poly','rbf','sigmoid']:
    clf = svm.SVR(kernel=k)
    clf.fit(x_train, y_train)
    confidence = clf.score(x_test, y_test)
    print('Percentage Accuracy = ', k, np.round(accuracy), '%')


Percentage Accuracy =  linear 88.0 %
Percentage Accuracy =  poly 88.0 %
Percentage Accuracy =  rbf 88.0 %
Percentage Accuracy =  sigmoid 88.0 %


### Predicted Values

In [199]:
predictions = linear_model.predict(x_test)

In [200]:
price_pred = pd.DataFrame(predictions, columns = ['Predicted Price'])
price_pred.to_csv('Price Predictions.csv', index_label = ['S/N'])

In [201]:
price_pred.head(10)

Unnamed: 0,Predicted Price
0,43.322709
1,40.542875
2,40.887115
3,46.973188
4,45.131754
5,47.358849
6,40.227388
7,43.365075
8,43.051027
9,42.823897
