#### Import Section

In [1]:
import numpy as np
import pandas as pd
import psycopg2
import matplotlib.pyplot as plt
import datetime
from sklearn.tree import DecisionTreeClassifier
from sklearn.cross_validation import train_test_split
from sklearn.grid_search import GridSearchCV, RandomizedSearchCV
from sklearn import preprocessing
from sklearn import utils
from sklearn.metrics import scorer
from sklearn.metrics import accuracy_score



### Database connection

In [2]:
conn_string = "host='localhost' dbname='stockmarket' user='postgres' password='5265104d'"
conn = psycopg2.connect(conn_string)

## Read dataset

In [3]:
df = pd.read_sql_query(
    '''select i.date ,i.open ,i.high ,i.low ,i.close ,i.adj_close ,i.volume ,c.name
    from dataset_information i 
    join dataset_company c
    on i.company_id=c.id
    where company_id = {}'''.format(3)  , conn, index_col='date')

In [4]:
print(df.head())

                open      high       low     close  adj_close      volume  \
date                                                                        
1986-03-12  0.444196  0.448661  0.441964  0.441964   0.020108  21420000.0   
1986-03-13  0.441964  0.446429  0.435268  0.441964   0.020108  28991200.0   
1986-03-14  0.441964  0.468750  0.441964  0.466518   0.021225  96213600.0   
1986-03-17  0.464286  0.464286  0.453125  0.464286   0.021123  29680000.0   
1986-03-18  0.464286  0.486607  0.462054  0.479911   0.021834  62339200.0   

            name  
date              
1986-03-12  AAPL  
1986-03-13  AAPL  
1986-03-14  AAPL  
1986-03-17  AAPL  
1986-03-18  AAPL  


## Calculate bolingerband 

In [5]:
df["20d"] = np.round(df["close"].rolling(window = 20, center = False).mean(), 2)
# 2. Compute rolling standard deviation
apple_rstd = np.round(df['close'].rolling(window = 20, center = False).std(), 2)

# 3. Compute upper and lower bands
df['upperband'] = df['20d'] + 2 * apple_rstd
df['lowerband'] = df['20d'] - 2 * apple_rstd

## prepairing dataframe with label

In [6]:
#feature_cols = ['high-upperband', 'low-lowerband','dicesion']

df['high-upperband'] = df['high']-df['upperband']
df['low-lowerband'] =df['low']-df['lowerband']
df['decision'] = np.where(df['high-upperband'] > 0.0 , 'Sell',np.where( df['low-lowerband'] < 0.0,'Buy','Buy & Sell'))
df = df.dropna()
print(df.tail())

                  open        high         low       close   adj_close  \
date                                                                     
2018-03-07  174.940002  175.850006  174.270004  175.029999  175.029999   
2018-03-08  175.479996  177.119995  175.070007  176.940002  176.940002   
2018-03-09  177.960007  180.000000  177.389999  179.979996  179.979996   
2018-03-12  180.289993  182.389999  180.210007  181.720001  181.720001   
2018-03-13  182.589996  183.500000  179.240005  179.970001  179.970001   

                volume  name     20d  upperband  lowerband  high-upperband  \
date                                                                         
2018-03-07  31703500.0  AAPL  170.85     185.69     156.01       -9.839994   
2018-03-08  23774100.0  AAPL  171.72     185.78     157.66       -8.660005   
2018-03-09  32185200.0  AAPL  172.97     185.13     160.81       -5.130000   
2018-03-12  32207100.0  AAPL  174.23     184.19     164.27       -1.800001   
2018-03-13  3

## Prepairing Feature columns and label columns

In [7]:
def extractBolingerFeatures(df):
    feature_cols = ['adj_close','20d','upperband','lowerband']
    label_cols = ['decision']
    x = df[feature_cols]
    y = df[label_cols]
    npY = np.array(y)
    return (x,y,npY)

### test extractBolingerFeatures method

In [8]:
x,y,npY = extractBolingerFeatures(df)
print(x,y,npY)

             adj_close     20d  upperband  lowerband
date                                                
1986-04-09    0.022037    0.48       0.52       0.44
1986-04-10    0.022139    0.48       0.52       0.44
1986-04-11    0.021936    0.49       0.51       0.47
1986-04-14    0.021834    0.49       0.51       0.47
1986-04-15    0.022240    0.49       0.51       0.47
1986-04-16    0.022951    0.49       0.51       0.47
1986-04-17    0.023560    0.49       0.51       0.47
1986-04-18    0.024170    0.49       0.51       0.47
1986-04-21    0.024678    0.50       0.54       0.46
1986-04-22    0.024271    0.50       0.54       0.46
1986-04-23    0.024068    0.50       0.54       0.46
1986-04-24    0.025490    0.50       0.54       0.46
1986-04-25    0.026201    0.51       0.57       0.45
1986-04-28    0.025998    0.51       0.57       0.45
1986-04-29    0.025388    0.51       0.57       0.45
1986-04-30    0.024576    0.52       0.58       0.46
1986-05-01    0.024576    0.52       0.58     

### ApplyDecisionTreeModel

In [9]:
def ApplyDecisionTreeModel(fetures,label,predictList):
    dtc = DecisionTreeClassifier(criterion='entropy')
    dtc.fit(fetures,label)
    y_predict = str(dtc.predict(predictList))
    return y_predict

### test ApplyDecisionTreeModel

In [10]:
# date = '2018-03-07'
adj_close = 175.029999
midband = 170.85
upperband = 185.69
lowerband = 156.01
predictCol = [[175.02999,170.85,185.69,156.01]]
result = ApplyDecisionTreeModel(x,y,predictCol)
print(result)

['Buy & Sell']


## prepairing Decesion model

In [11]:
dtc = DecisionTreeClassifier(criterion='entropy')

## Split dataset into test and train parts

In [12]:
X_train, X_test, y_train, y_test = train_test_split(x,y, test_size=0.2, random_state=3)

## basic c4.5 test

In [13]:
dtc.fit(X_train,y_train)
y_predict = dtc.predict(X_test)
print(float(accuracy_score(y_test, y_predict)) *100)

77.95031055900621
