In [1]:
import sqlite3
import os
from datetime import datetime
from datetime import timedelta
from sklearn.datasets import fetch_20newsgroups
import time
from sklearn.model_selection import cross_val_score
from sklearn import svm
import pickle
from threading import Thread


start_time = time.time()

twenty_train = fetch_20newsgroups(subset='train', shuffle=True, random_state=42)
cat = list(twenty_train.target_names)


def writeToFile(fName,message):
    file = open(fName,"a")
    file.write (message)
    file.close()
    

def savePickleFile(d,varList):
    with open(d, 'wb') as f: 
        pickle.dump(varList, f)
    f.close()
    
def loadPickleFile(d):
    with open(d,'rb') as f:  # Python 3: open(..., 'rb')
        varList = pickle.load(f)
    f.close()
    return varList 

def makeDirectory(folder_name1,sub_folder):
    directory = os.path.join(folder_name1 , sub_folder)
    if not os.path.exists(directory):
            os.makedirs(directory)
    return directory

def getParentDir(cwd):
    parentDir = os.path.dirname(cwd)
    return parentDir

def connectDB():
    cwd = os.getcwd()
    parentDir = os.path.dirname(cwd)
    sqlite_file = parentDir+"/mydatabase.db"
    conn = sqlite3.connect(sqlite_file)
    c = conn.cursor()
    command="CREATE TABLE IF NOT EXISTS Stock(date TEXT, stockName TEXT, open INTEGER, close INTEGER, volume INTEGER, change INTEGER, UNIQUE(date, stockName) ON CONFLICT REPLACE);"
    c.execute(command)
    return conn

def extractDB(table_name,predicted,c,company):
    #c is connector SQL
    for i in range(len(data)):
        values = (predicted[i][0],company,data['open'].values[i],
                  data['close'].values[i],data['volume'].values[i],predicted[i][1])
        c.execute("INSERT INTO {tn} VALUES(?,?,?,?,?,?)".format(tn=table_name),values)

def fetchStock(start,end,conn,stockName):
    tableName="Stock"
    c = conn.cursor()
    startDate = start.strftime('%Y%m%d')
    endDate = end.strftime('%Y%m%d')
    command= "select * from %s where substr(date,1,4)||substr(date,6,2)||substr(date,9,2) BETWEEN '%s' and '%s' AND stockName='%s'"%(tableName,startDate,endDate,stockName)
#     print (command)
    c.execute(command)
    result_set = c.fetchall()
#     print (result_set)
    return result_set

def fetchTweets(start,end,conn):
    tableName="Twitter"
    c = conn.cursor()
#     t = datetime(2012, 2, 23, 0, 0)
#     t.strftime('%Y%m%d')
    startDate = start.strftime('%Y%m%d')
    endDate = end.strftime('%Y%m%d')
    command= "select * from %s where substr(date,1,4)||substr(date,6,2)||substr(date,9,2) BETWEEN '%s' and '%s'"%(tableName,startDate,endDate)
    c.execute(command)
    result_set = c.fetchall()
    return result_set

def getAllStockNames(conn):
    tableName="Stock"
    c = conn.cursor()
    command = "SELECT DISTINCT stockName FROM %s"%(tableName)
    c.execute(command)
    result_set = c.fetchall()
    return result_set

def disconnectDB(conn):
    conn.close()
    
def getPricesForStock(name,startObj,endObj):
    conn =connectDB()
    stockValues=fetchStock(startObj,endObj,conn,name)
#     print (stockValues)
    disconnectDB(conn)
    #     a = np.matrix(stockValues)
    return stockValues
def getTweets(startObj,endObj):
    conn =connectDB()
    tweets=fetchTweets(startObj,endObj,conn)
#     print (stockValues)
    disconnectDB(conn)
    #     a = np.matrix(stockValues)
    return tweets
def getXVector(endObj,daysFromThePast):
    global cat;
    startObj = endObj - timedelta(days=daysFromThePast)
    a=getTweets(startObj,endObj)
#     print(cat)
    xVector=[0 for i in range(len(cat))]
#     print(xVector)
    for i in a:
        xVector[cat.index(i[5])]+=1
#     print (xVector)
    sum = 0
    for i in xVector:
        sum+=i
    for i in range(len(xVector)):
        xVector[i]=xVector[i]/sum
#     print (xVector)
    return xVector

def getYVector(name,startObj,endObj):
    a = getPricesForStock(name,startObj,endObj)
    fluctuationsList = []
    dateList=[]
    for i in  a:
        fluctuationsList.append(i[-1])
        dateList.append(i[0])
    return dateList,fluctuationsList

def getDateObj(s):
    datetime_object = datetime.strptime(s, '%Y-%m-%d')
    return datetime_object

def getXandY(name,startObj,endObj,d):
    daysInThePast = d
    dateList,yVector=getYVector(name,startObj,endObj)
    xList = []
    yList = yVector
    for i in dateList:
        xList.append(getXVector(getDateObj(i),daysInThePast))
    return xList,yList

def getXandYMulti(stockArr,startObj,endObj,d):
    yMatrix=[]
    xBuilt=0
    for s in stockArr:
        if (xBuilt==0):
            xVector,yVector=getXandY(s,startObj,endObj,d)
            yMatrix.append(yVector)
        else:
            d,yVector=getYVector(s,startObj,endObj)
            yMatrix.append(yVector)
        
        xBuilt=1
    return xVector,yMatrix
        
def investWithThreshold(yM,threshold):
    for i in range(len(yM)):
        for j in range(len(yM[i])):
            value = float(yM[i][j])

            if (value >= threshold):
                c ="Gain"
            elif (value <= (-1*threshold)):
                c = "Loss"
            else:
                c = "Neutral"
                
            yM[i][j]=c
    return yM

def runMLModel(xV,yV,stockName,d):
    k='sigmoid'
    clf = svm.SVC(decision_function_shape='ovo',kernel=k)
    scores = cross_val_score(clf, xV, yV, cv=10)
    message = ("StockName: %s"%stockName) +"_Days_%s"%str(d) + "_Kernel_%s"%k + "\n"
    message += ("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)+"\n")
    
    OriginPath = getParentDir(os.getcwd())
    subFolderPath=makeDirectory(OriginPath,"PredictionOutput")
    pickleObjPath = makeDirectory(subFolderPath,"MLModels")
    
    pklFName = pickleObjPath + "/" + stockName +"_Days_%s"%str(d) + "_Kernel_%s"%k + "\n"
    savePickleFile(pklFName,[clf])
    
    reportName = subFolderPath + "/Report.txt"
    writeToFile(reportName,message)

def savePickle(varList):
    OriginPath = getParentDir(os.getcwd())
    subFolderPath=makeDirectory(OriginPath,"PredictionOutput")
    pickleObjPath = makeDirectory(subFolderPath,"MLModels")
    
    pklFName = pickleObjPath + "/" + "XList" +"_Days_%s"%str(d) + "_Kernel_%s"%k + "\n"
    savePickleFile(pklFName,varList)

In [2]:
def savePickleFile(d,varList):
    with open(d, 'wb') as f: 
        pickle.dump(varList, f)
    f.close()
    
def loadPickleFile(d):
    with open(d,'rb') as f:  # Python 3: open(..., 'rb')
        varList = pickle.load(f)
    f.close()
    return varList 


In [3]:
pickedStock = ["AAPL","AMZN","FB","JPM","TSLA","BDX","GOOGL","PGR","JNJ","CSCO","CLX","PG","MSFT"
               ,"CHKP","ORCL","DIS","SAFT","ITC",
              "KO","WMT","VZ","TRV","MCD","COST","NFLX","TWTR"]

# conn =connectDB()
days = 10
if (os.path.isfile("/home/ubun/Desktop/StockPre/PredictionOutput/MLModels/yMatrix.plk")):
    ymatrix = loadPickleFile("/home/ubun/Desktop/StockPre/PredictionOutput/MLModels/yMatrix_%s.plk")[0]
    xV = loadPickleFile("/home/ubun/Desktop/StockPre/PredictionOutput/MLModels/xVector_%s.plk")[0]
else:
    xV,ymatrix = getXandYMulti(pickedStock,datetime(2016,1,1),datetime(2018,3,20),days)
    savePickleFile("/home/ubun/Desktop/StockPre/PredictionOutput/MLModels/xVector_%s.plk"%(days),[xV])
    savePickleFile("/home/ubun/Desktop/StockPre/PredictionOutput/MLModels/yMatrix_%s.plk"%(days),[ymatrix])

yM = investWithThreshold(ymatrix,2.0)
# for s in pickedStock:
#     runMLModel(xV,yM[pickedStock.index(s)],s,days)



print ("--- Just Finish Building X---")
print ("--- %s seconds ----" % (time.time() - start_time))

threads=[]
for s in pickedStock:
    t = Thread(target = runMLModel, args = (xV,yM[pickedStock.index(s)],s,days,))
#     t = Thread(target = getTweets, args = (p,)) 
    threads.append(t)
for t in threads:
    t.start()
for t in threads:
    t.join()
print("Exit main thread")
print("--- %s seconds ---" % (time.time() - start_time))


--- Just Finish Building X---
--- 0.429718017578125 seconds ----


  **self._backend_args)
  **self._backend_args)
  **self._backend_args)
  **self._backend_args)
  **self._backend_args)
  **self._backend_args)
  **self._backend_args)
  **self._backend_args)
  **self._backend_args)
  **self._backend_args)
  **self._backend_args)
  **self._backend_args)
  **self._backend_args)
  **self._backend_args)
  **self._backend_args)
  **self._backend_args)
Exception in thread Thread-21:
Traceback (most recent call last):
  File "/home/ubun/anaconda3/lib/python3.6/threading.py", line 916, in _bootstrap_inner
    self.run()
  File "/home/ubun/anaconda3/lib/python3.6/threading.py", line 864, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-1-5e3a85ba14ec>", line 184, in runMLModel
    scores = cross_val_score(clf, xV, yV, cv=10)
  File "/home/ubun/anaconda3/lib/python3.6/site-packages/sklearn/model_selection/_validation.py", line 342, in cross_val_score
    pre_dispatch=pre_dispatch)
  File "/home/ubun/anaconda3/lib/python3.6/site-packag

  **self._backend_args)


Exit main thread
--- 1.5201094150543213 seconds ---




[[0.0034602076124567475,
  0.031141868512110725,
  0.01384083044982699,
  0.010380622837370242,
  0.0034602076124567475,
  0.031141868512110725,
  0.13494809688581316,
  0.04498269896193772,
  0.06228373702422145,
  0.08996539792387544,
  0.09342560553633218,
  0.031141868512110725,
  0.006920415224913495,
  0.04498269896193772,
  0.11764705882352941,
  0.020761245674740483,
  0.06228373702422145,
  0.12110726643598616,
  0.0657439446366782,
  0.010380622837370242],
 [0.0029498525073746312,
  0.02359882005899705,
  0.014749262536873156,
  0.011799410029498525,
  0.0,
  0.0471976401179941,
  0.12979351032448377,
  0.04129793510324484,
  0.07669616519174041,
  0.0855457227138643,
  0.11799410029498525,
  0.029498525073746312,
  0.008849557522123894,
  0.05014749262536873,
  0.10619469026548672,
  0.035398230088495575,
  0.04424778761061947,
  0.10914454277286136,
  0.058997050147492625,
  0.0058997050147492625],
 [0.0,
  0.013888888888888888,
  0.013888888888888888,
  0.01111111111111111

54