In [2]:
import cv2
import numpy as np
import sys
import math
import sklearn
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC
from sklearn.model_selection import LeaveOneOut, KFold
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import itertools


  _nan_object_mask = _nan_object_array != _nan_object_array


In [3]:



def getVals(fileName, isVerbose = False):

      # Create a VideoCapture object and read from input file
      # If the input is the camera, pass 0 instead of the video file name
      cap = cv2.VideoCapture(fileName+'.mp4')
      # Check if camera opened successfully
      if (cap.isOpened()== False): 
            
        print("Error opening video stream or file")
        return -1

      prevFrame = np.zeros((720,1280))

      # Read until video is completed
      counter = -1

      saveToFile = []
      pathThreshold = './vidFrameThreshold/'

      pathNormal = './vidFrameNormal/'

      isBlink = False

      blinkThreshold = 2


      blink_timestamp_array = []
      while(cap.isOpened()):
        # Capture frame-by-frame
        ret, frame = cap.read()

        
        if ret == True:
          
          frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
          # frame = cv2.GaussianBlur(  frame, (19,19), 1000)

          
          
            # saveToFile.append((np.ndarray.flatten(abs(frame-prevFrame) ).astype(np.uint8)))
          counter+=1
          # if counter%2==1:
          frame2 = cv2.absdiff(frame.astype(np.uint8), prevFrame.astype(np.uint8))
          _, frame2 = cv2.threshold(frame2,20, 255, cv2.THRESH_BINARY)
#           name = pathNormal+'vidFrame'+str(counter)+'.png'
#           cv2.imwrite(name, frame)


          # _, frame = cv2.threshold(frame, 250, 255, cv2.THRESH_BINARY)


          if np.mean(frame2)>blinkThreshold and isBlink == False and counter!=0:
            blink_timestamp_array.append(cap.get(cv2.CAP_PROP_POS_MSEC))
            isBlink = True

          elif np.mean(frame2)>blinkThreshold and isBlink == True:
            isBlink = False
          if counter < 150:
              name = pathThreshold+'vidFrame'+str(counter)+'.png'
              cv2.imwrite(name, frame2)

          if isVerbose:
              print counter, np.mean(frame2), len(blink_timestamp_array), isBlink
          
          # Display the resulting frame
          cv2.imshow('Frame', frame2)
          
          # Press Q on keyboard to  exit
          if cv2.waitKey(25) & 0xFF == ord('q'):
            break
          
          #difference between max in previous and current frame
          maxDifference = np.max(prevFrame-frame) 
          # print cap.get(cv2.CAP_PROP_POS_MSEC), counter, maxDifference
          prevFrame = frame
        # Break the loop
        else: 
          break
      
      # When everything done, release the video capture object
      cap.release()
      np.savetxt("./differenceFile.csv", np.asarray(saveToFile))
#       print counter
      if isVerbose:
          print "Number of blinks:", len(blink_timestamp_array)
          print "Blink Timestamp: ", blink_timestamp_array
      # Closes all the frames
      cv2.destroyAllWindows()
      if(len(blink_timestamp_array))==0: return -1
      return np.ndarray.tolist(np.asarray(blink_timestamp_array) - blink_timestamp_array[0])



In [4]:
def arrangeInBuckets(timeStampArray, interval = 500, totalSize = 60000):

    timeStampBuckets = np.zeros((timeStampArray.shape[0], int(totalSize/interval)))
    for  i, timeStampArrayI in enumerate(timeStampArray):
        for j, val in enumerate(timeStampArrayI):
            timeStampBuckets[i][int(math.ceil(val/interval))] += 1 
#         timeStampBuckets[i] = np.reshape(timeStampBuckets[i],newshape=( timeStampBuckets[i].shape[0], 1))
#         print timeStampBuckets[i].shape 
        
#     timeStampBuckets = np.vstack((np.arange(start=0, stop=totalSize, step=interval), timeStampBuckets))
    
    return timeStampBuckets


In [5]:
def loadFromVideo(filePath, numVid):

    timeStampArray = []
    for i in xrange(1,numVid):
        fileName = filePath+str(i)
        print i
        response = getVals(fileName)
        if response == -1: continue
        timeStampArray.append(response)
    timeStampArrayNP = np.asarray(timeStampArray)
    print len(timeStampArray)
    return timeStampArrayNP

In [6]:
def writeToFile(npToWrite, fileName):
    np.savetxt(fname=fileName, X=npToWrite, delimiter=',')

In [7]:
def classifyTrain(data, labels, CVal):
    classifier = LinearSVC(random_state = 0,max_iter = 1000000, C=CVal)
    print data.shape
    
    classifier.fit(data, labels)
    return classifier
    

In [8]:
def randomForest(data, labels):
    model = RandomForestClassifier(random_state=42, n_estimators=200, max_depth=4)
    model.fit(data, labels)
    return randomForest

In [9]:
def saveArray(fileName, numpyArrayName):
    np.save(fileName, numpyArrayName)

In [10]:
def normalizeBlinkTimes(timeStampArray):
    newArray = timeStampArray.copy()
    for i, row in enumerate(timeStampArray):
        rowNP = np.asarray(timeStampArray[i]).copy()
        print np.max(rowNP)
        rowNP = rowNP/np.amax(rowNP)
        newArray[i] = np.ndarray.tolist(rowNP)
    return newArray

In [11]:
def generateDataset(datasetA, datasetB, limitA, limitB):
    trainingSet = np.vstack((datasetA[0:limitA,:], datasetB[0:limitB,:]))
#     print trainingSet.shape
    validationSet = np.vstack((datasetA[limitA+1:,:], datasetB[limitB+1:, :]))
    trainingLabels = np.vstack((np.ones((datasetA[0:limitA,:].shape[0],1)), -1*np.ones((datasetB[0:limitB,:].shape[0],1))))
    validationLabels = np.vstack((np.ones((datasetA[limitA+1:,:].shape[0],1)), -1*np.ones((datasetB[limitB+1:,:].shape[0],1))))
    
    print datasetA[limitA+1,:].shape
#     print validationLabels.shape
    return [trainingSet, validationSet, trainingLabels, validationLabels]

In [12]:
def generateMultiLabelDataset(datasetA, datasetB, dataSetC, dataSetD, dataSetE, limitA, limitB, limitC, limitD, limitE):
    trainingSet = np.vstack((datasetA[0:limitA,:], datasetB[0:limitB,:], dataSetC[0:limitC,:], dataSetD[0:limitD,:], dataSetE[0:limitE,:]))
#     trainingSet = np.vstack((trainingSet, dataSetC[0:limitC,:]))
#     print trainingSet.shape
    validationSet = np.vstack((datasetA[limitA+1:,:], datasetB[limitB+1:, :], dataSetC[limitC+1:,:], dataSetD[limitD+1, :])
                             , dataSetE[limitE+1:,:])
    print validationSet.shape
#     validationSet = np.vstack((validationSet, dataSetC[limitC+1:,:]))
#     print validationSet.shape
    trainingLabels = np.vstack((np.ones((datasetA[0:limitA,:].shape[0],1)), 2*np.ones((datasetB[0:limitB,:].shape[0],1))))
    trainingLabels = np.vstack((trainingLabels, 3*np.ones((dataSetC[0:limitC,:].shape[0], 1)), 4*ones((dataSetD[0:limitD,:].shape[0]))
                               , 4*ones((dataSetE[0:limitE,:].shape[0])) ))
    
    validationLabels = np.vstack((np.ones((datasetA[limitA+1:,:].shape[0],1)), -1*np.ones((datasetB[limitB+1:,:].shape[0],1))))
    validationLabels = np.vstack((validationLabels, 3*np.ones((dataSetC[limitC+1:,:].shape[0])), 4*np.ones((dataSetD[limitD+1:,:].shape[0]))
                                                              , dataSetE[limitE+1:,:].shape[0]))
    
    print datasetA[limitA+1,:].shape
#     print validationLabels.shape
    return [trainingSet, validationSet, trainingLabels, validationLabels]

In [13]:
def run_classifier(trainingSet, trainingLabels, validationSet, validationLabels):
#     print validationSet.shape, validationLabels.shape
    for c in [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]:
        classifier = classifyTrain(trainingSet, trainingLabels, c)
#         classifier = randomForest(trainingSet, trainLabels)
        predictions = classifier.predict(validationSet)
        counter = 0 
        for i in range(len(validationLabels)):
            if predictions[i] == validationLabels[i]:
                counter += 1

        print "Accuracy is: ", float(counter)/len(predictions)*100, "%"

In [14]:
def getMeanBlinks(timeStampArrayNP):
    totalBlinks = 0.0
    for i in range(timeStampArrayNP.shape[0]):
         totalBlinks += len(timeStampArrayNP[i])
#     print totalBlinks/i
    return totalBlinks/i

In [15]:
def runKFold(dataSetA, dataSetB, c = 0.01):
    overallDataSet = np.vstack((dataSetA, dataSetB))
    print overallDataSet.shape
    
    overallLabels = trainLabels = np.vstack((np.ones((dataSetA.shape[0],1)), -1*np.ones((dataSetB.shape[0],1))))
    kFold = KFold(n_splits=5, shuffle=True)
    # print type(loo)
    accuracy = 0.0
    runCount = 0
    for train_index, test_index in kFold.split(overallDataSet):
            X_train, X_test = overallDataSet[train_index], overallDataSet[test_index]
            y_train, y_test = overallLabels[train_index], overallLabels[test_index]

            classifier = classifyTrain(X_train, y_train,c)
            predictions = classifier.predict(X_test)
            counter = 0 
            for i in range(len(y_test)):
                if predictions[i] == y_test[i]:
                    counter += 1
            accuracy += float(counter)/len(predictions)
            runCount += 1
            
#             graph = plt.gcf()
#             graph.set_size_inches(14, 7)
#             plt.plot(range(len(train_error)),train_error)
#             plt.title("Loss function for logistic regression")
#             plt.xlabel("No. of epochs")
#             plt.ylabel("L(Theta)")
    print 'Accuracy: ', accuracy/runCount*100, '%'
    print accuracy
    print runCount

In [16]:
def runMultiKFold(dataSet, dataLabel, c = 0.01):
#     overallDataSet = np.vstack((dataSetA, dataSetB))
#     print overallDataSet.shape
    
#     overallLabels = trainLabels = np.vstack((np.ones((dataSetA.shape[0],1)), -1*np.ones((dataSetB.shape[0],1))))
    kFold = KFold(n_splits=5, shuffle=True)
    # print type(loo)
    accuracy = 0.0
    runCount = 0
    ctr = 0
    for train_index, test_index in kFold.split(dataSet):
            X_train, X_test = dataSet[train_index], dataSet[test_index]
            y_train, y_test = dataLabel[train_index], dataLabel[test_index]

            classifier = classifyTrain(X_train, y_train,c)
            predictions = classifier.predict(X_test)
            counter = 0 
            for i in range(len(y_test)):
                if predictions[i] == y_test[i]:
                    counter += 1
            accuracy += float(counter)/len(predictions)
            runCount += 1
            if ctr == 0:
#                 graph = plt.gcf()
#                 graph.set_size_inches(14, 7)
#                 plt.plot(range(len(train_error)),train_error)
#                 plt.title("Loss function for logistic regression")
#                 plt.xlabel("No. of epochs")
#                 plt.ylabel("L(Theta)")
                
                # Compute confusion matrix
                cnf_matrix = confusion_matrix(y_test, predictions)
                np.set_printoptions(precision=2)

                # Plot non-normalized confusion matrix
#                 plt.figure()
#                 plot_confusion_matrix(cnf_matrix, classes=['google','facebook', 'twitter'],
#                                       title='Confusion matrix, without normalization')

                # Plot normalized confusion matrix
                plt.figure()
                plot_confusion_matrix(cnf_matrix, classes=['google','facebook', 'twitter', 'linkedIn', 'youtube'], normalize=True,
                                      title='Normalized confusion matrix')

                plt.show()
            ctr+=1
#             counter += 1
    print 'Accuracy: ', accuracy/runCount*100, '%'
    print accuracy
    print runCount

In [17]:
def getTotalBlinksAndTime(timeStampArrayNP):
    for i in range(timeStampArrayNP.shape[0]):
         print len(timeStampArrayNP[i]),  timeStampArrayNP[i][len(timeStampArrayNP[i])-1] - timeStampArrayNP[i][0]


In [23]:
diffTwitterTimeStamp256 = loadFromVideo('./data/diffSpeeds/tw256/tw-256-kbps-',11)

1
2
3
4
5
6
7
8
9
10
10


In [50]:
diffFacebookTimeStamp1 = loadFromVideo('./data/diffSpeeds/fb1/fb-1-mbps-', 10)

1
2
3
4
5
6
7
8
9
7


In [26]:
diffFacebookTimeStamp2 = loadFromVideo('./data/diffSpeeds/fb2/fb-2-mbps-', 17)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
16


In [27]:
diffFacebookTimeStamp4 = loadFromVideo('./data/diffSpeeds/fb4/fb-4-mbps-', 23)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
22


In [28]:
diffFacebookTimeStamp8 = loadFromVideo('./data/diffSpeeds/fb8/fb-8-mbps-', 11)

1
2
3
4
5
6
7
8
9
10
10


In [29]:
diffFacebookTimeStamp16 = loadFromVideo('./data/diffSpeeds/fb16/fb-16-mbps-', 7)

1
2
3
4
5
6
6


In [23]:
print type(timeStampArrayYoutubeNP)

<type 'numpy.ndarray'>


In [78]:
# timeStampArrayTwitterNP = loadFromVideo('./data/twitter/TW', 62)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
61


In [130]:
# x = loadFromVideo('./data/google/google', 2)

1
1


In [35]:
saveArray('linkedInTimeStamps.npy', timeStampArrayLinkedInNP)

In [51]:
saveArray('diffTwitterTimeStamp256.npy', diffTwitterTimeStamp256)
saveArray('diffFacebookTimeStamp1.npy', diffFacebookTimeStamp1)
saveArray('diffFacebookTimeStamp2.npy', diffFacebookTimeStamp2)
saveArray('diffFacebookTimeStamp4.npy', diffFacebookTimeStamp4)
saveArray('diffFacebookTimeStamp8.npy', diffFacebookTimeStamp8)
saveArray('diffFacebookTimeStamp16.npy', diffFacebookTimeStamp16)


In [18]:
timeStampArrayNP = np.load('googleTimeStamps.npy')
timeStampArrayFBNP = np.load('facebookTimeStamps.npy')
timeStampArrayTwitterNP = np.load('twitterTimeStamps.npy')
timeStampArrayYoutubeNP = np.load('youtubeTimeStamps.npy')
timeStampArrayLinkedInNP = np.load('linkedInTimeStamps.npy')


In [19]:
diffFacebookTimeStamp1 = np.load('diffFacebookTimeStamp1.npy')
diffFacebookTimeStamp2 = np.load('diffFacebookTimeStamp2.npy')
diffFacebookTimeStamp4 = np.load('diffFacebookTimeStamp4.npy')
diffFacebookTimeStamp8 = np.load('diffFacebookTimeStamp8.npy')
diffFacebookTimeStamp16 = np.load('diffFacebookTimeStamp16.npy')
diffTwitterTimeStamp256 = np.load('diffTwitterTimeStamp256.npy')


In [20]:
timeStampBuckets = arrangeInBuckets(np.asarray(timeStampArrayNP))
timeStampBucketsFB = arrangeInBuckets(np.asarray(timeStampArrayFBNP))
timeStampBucketsTwitter = arrangeInBuckets(np.asarray(timeStampArrayTwitterNP))
timeStampBucketsYoutube = arrangeInBuckets(np.asarray(timeStampArrayYoutubeNP))
timeStampBucketsLinkedIn = arrangeInBuckets(np.asarray(timeStampArrayLinkedInNP))


In [21]:
diffTwitterTimeStampBucket256 = arrangeInBuckets(np.asarray(diffTwitterTimeStamp256))
diffFacebookTimeStampBucket1  = arrangeInBuckets(np.asarray(diffFacebookTimeStamp1))
diffFacebookTimeStampBucket2 = arrangeInBuckets(np.asarray(diffFacebookTimeStamp2))
diffFacebookTimeStampBucket4 = arrangeInBuckets(np.asarray(diffFacebookTimeStamp4))
diffFacebookTimeStampBucket8 = arrangeInBuckets(np.asarray(diffFacebookTimeStamp8))
diffFacebookTimeStampBucket16 = arrangeInBuckets(np.asarray(diffFacebookTimeStamp16))

In [44]:
writeToFile(timeStampBucketsLinkedIn, 'timeStampBucketsLinkedIn500.csv')
writeToFile(timeStampBucketsYoutube, 'timeStampBucketsYoutube500.csv')
print "Write Complete"

Write Complete


In [36]:

# timeStampArrayFacebook = []
# for i in xrange(1,59):
#     fileName = './data/facebook/FB'+str(i)
#     print i
#     iterVal = getVals(fileName)
#     if iterVal==-1: 
#         print 'problem for this i'
#         continue
#     timeStampArrayFacebook.append(iterVal)
    
    

timeStampArrayFBNP = np.asarray(timeStampArrayFacebook)
timeStampBucketsFB = arrangeInBuckets(np.asarray(timeStampArrayFBNP))

1
2
3
4
5
6
7
8
problem for this i
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58


In [113]:
trainingSet = np.vstack((timeStampBuckets[0:48,:], timeStampBucketsFB[0:48,:]))

In [114]:
validationSet = np.vstack((timeStampBuckets[49:,:], timeStampBucketsFB[49:, :]))

In [115]:
trainLabels = np.vstack((np.ones((timeStampBuckets[0:48,:].shape[0],1)), -1*np.ones((timeStampBucketsFB[0:48,:].shape[0],1))))

In [116]:

print np.shape(trainLabels)
print trainingSet.shape
print validationLabels.shape
print validationSet.shape

(96, 1)
(96, 120)
(14, 1)
(15, 120)


In [117]:
validationLabels = np.vstack((np.ones((timeStampBuckets[49:,:].shape[0],1)), -1*np.ones((timeStampBucketsFB[49:,:].shape[0],1))))

In [118]:
predictions = classifier.predict(validationSet)

In [120]:
print np.shape(trainingSet)

(96, 120)


In [51]:
print np.shape(trainLabels)

(2, 48)


0.866666666667
0.866666666667
0.866666666667
0.866666666667
0.866666666667


In [237]:
writeToFile(timeStampBuckets, 'timeStampBucketGoogle.csv')
writeToFile(timeStampBucketsFB, 'timeStampBucketFB500.csv')
writeToFile(timeStampBucketsTwitter, 'timeStampBucketTwitter500.csv')

In [124]:

np.save('googleTimeStamps.npy',timeStampArrayNP)
np.save('facebookTimeStamps.npy', timeStampArrayFBNP)

In [125]:
temp = np.load('googleTimeStamps.npy')

In [126]:
print temp.shape
print timeStampArrayNP.shape

(56,)
(56,)


In [22]:
# print type(timeStampArrayNP)
googleNormalizedTimeStamp = normalizeBlinkTimes(timeStampArrayNP)
facebookNormalizedTimeStamp = normalizeBlinkTimes(timeStampArrayFBNP) 
twitterNormalizedTimeStamp = normalizeBlinkTimes(timeStampArrayTwitterNP)
youtubeNormalizedTimeStamp = normalizeBlinkTimes(timeStampArrayYoutubeNP)
linkedInNormalizedTimeStamp = normalizeBlinkTimes(timeStampArrayLinkedInNP)

8733.33333333
10100.0
9000.0
12100.0
10300.0
14233.3333333
12666.6666667
10800.0
11133.3333333
20266.6666667
12233.3333333
15166.6666667
11200.0
11966.6666667
11733.3333333
13300.0
12366.6666667
12766.6666667
14033.3333333
12133.3333333
13866.6666667
11333.3333333
14533.3333333
13300.0
10900.0
11266.6666667
13833.3333333
14033.3333333
11633.3333333
10366.6666667
20733.3333333
16466.6666667
11366.6666667
15833.3333333
10933.3333333
19966.6666667
20100.0
16700.0
12300.0
23466.6666667
12900.0
12333.3333333
14100.0
11766.6666667
10733.3333333
11433.3333333
11666.6666667
12033.3333333
16133.3333333
14133.3333333
11833.3333333
11800.0
11700.0
18133.3333333
12233.3333333
15800.0
30833.3333333
30733.3333333
33833.3333333
26300.0
30533.3333333
25433.3333333
1800.0
30433.3333333
29966.6666667
24966.6666667
32233.3333333
26600.0
25533.3333333
24700.0
32433.3333333
30400.0
25800.0
25066.6666667
25766.6666667
25000.0
26400.0
29800.0
25133.3333333
30033.3333333
31866.6666667
30833.3333333
24933.3333

In [24]:
diffTwitterNormalizedTimeStamp256 = normalizeBlinkTimes(diffTwitterTimeStamp256)
diffFacebookNormalizedTimeStamp1 = normalizeBlinkTimes(diffFacebookTimeStamp1)
diffFacebookNormalizedTimeStamp2 = normalizeBlinkTimes(diffFacebookTimeStamp2)
diffFacebookNormalizedTimeStamp4 = normalizeBlinkTimes(diffFacebookTimeStamp4)
diffFacebookNormalizedTimeStamp8 = normalizeBlinkTimes(diffFacebookTimeStamp8)
diffFacebookNormalizedTimeStamp16 = normalizeBlinkTimes(diffFacebookTimeStamp16)


26685.5381367
25439.2757851
26508.1375862
26488.8137055
26594.1994008
25472.3310093
25534.2094118
25424.7583333
27597.0125604
26472.5073238
11684.709422
10602.5826232
9577.37487335
11727.2401594
12769.0770787
12670.1686891
12802.2189266
6634.09372146
7702.08733766
9513.11794872
7753.82302189
7588.77847659
7555.82186495
6477.42307692
8479.73910534
6344.64920128
9638.08642735
7447.58503401
9601.64120654
7505.471157
8493.41203136
7490.03654244
8611.77025316
5415.98918919
5286.86443381
4402.72628205
6350.91445428
4292.87709751
4355.94680484
5152.6745098
3303.11230585
6449.68188737
5352.44016393
5385.22169935
3299.57586619
5420.58834244
4363.8245283
4398.39194313
5180.25480043
4336.03791469
5337.07984749
3181.9738437
3266.51707317
3139.21765272
3246.86677402
4336.2788951
2246.93415133
2178.52077295
2082.13333333
3266.43225806
3275.28757764
3173.09223744
2188.27594937
2185.55238095
3233.52965155
3206.71413544
3234.06091686
3309.92592593
3137.72315593
4255.95023474
3035.54874875


In [79]:
print (diffTwitterNormalizedTimeStamp256[0])

[0.0, 0.04207920792079208, 0.04455445544554455, 0.04702970297029702, 0.08539603960396039, 0.12128712871287128, 0.12623762376237624, 0.1621287128712871, 0.16460396039603958, 0.20297029702970293, 0.2054455445544554, 0.24381188118811878, 0.28217821782178215, 0.28465346534653463, 0.3205445544554455, 0.323019801980198, 0.3613861386138614, 0.36386138613861385, 0.3997524752475247, 0.4022277227722772, 0.43935643564356436, 0.48019801980198024, 0.4826732673267327, 0.5198019801980198, 0.5222772277227723, 0.5581683168316831, 0.5631188118811881, 0.5990099009900989, 0.6373762376237624, 0.6423267326732673, 0.6782178217821782, 0.7165841584158416, 0.719059405940594, 0.7574257425742573, 0.7599009900990099, 0.8007425742574258, 0.8391089108910891, 0.8762376237623762, 0.8787128712871287, 0.917079207920792, 0.9195544554455446, 0.9591584158415841, 0.995049504950495, 1.0]


In [25]:
googleNormalizedBuckets = arrangeInBuckets(timeStampArray=googleNormalizedTimeStamp, interval=0.01, totalSize=1.1)
facebookNormalizedBuckets = arrangeInBuckets(timeStampArray=facebookNormalizedTimeStamp, interval=0.01, totalSize=1.1)
twitterNormalizedBuckets = arrangeInBuckets(timeStampArray=twitterNormalizedTimeStamp, interval=0.01, totalSize=1.1)
youtubeNormalizedBuckets = arrangeInBuckets(timeStampArray=youtubeNormalizedTimeStamp, interval=0.01, totalSize=1.1)
linkedInNormalizedBuckets = arrangeInBuckets(timeStampArray=linkedInNormalizedTimeStamp, interval=0.01, totalSize=1.1)

In [88]:
print (diffFacebookNormalizedTimeStamp2[7])

[0.0, 0.007782101167315174, 0.12062256809338519, 0.1284046692607004, 0.13618677042801555, 0.25680933852140075, 0.3696498054474708, 0.377431906614786, 0.3852140077821012, 0.49805447470817116, 0.5058365758754862, 0.6186770428015563, 0.6264591439688716, 0.6342412451361867, 0.7470817120622567, 0.7548638132295719, 0.7626459143968871, 0.8715953307392995, 0.8793774319066147, 0.8871595330739299, 1.0]


In [43]:
diffTwitterNormalizedBuckets256 = arrangeInBuckets(timeStampArray=diffTwitterNormalizedTimeStamp256, interval=0.01, totalSize=1.2)
diffFacebookNormalizedBuckets1 = arrangeInBuckets(timeStampArray=diffFacebookNormalizedTimeStamp1, interval=0.01, totalSize=1.2)
diffFacebookNormalizedBuckets2 = arrangeInBuckets(timeStampArray=diffFacebookNormalizedTimeStamp2, interval=0.01, totalSize=1.2)
diffFacebookNormalizedBuckets4 = arrangeInBuckets(timeStampArray=diffFacebookNormalizedTimeStamp4, interval=0.01, totalSize=1.2)
diffFacebookNormalizedBuckets8 = arrangeInBuckets(timeStampArray=diffFacebookNormalizedTimeStamp8, interval=0.01, totalSize=1.2)
diffFacebookNormalizedBuckets16 = arrangeInBuckets(timeStampArray=diffFacebookNormalizedTimeStamp16, interval=0.01, totalSize=1.2)


# print googleNormalizedBuckets.shape
# print facebookNormalizedBuckets[:56,].shape
print googleNormalizedBuckets.shape
print facebookNormalizedBuckets.shape

In [182]:
normalizedDataSet = generateDataset(googleNormalizedBuckets, twitterNormalizedBuckets, 48, 48)
run_classifier(trainingSet=normalizedDataSet[0], validationSet=normalizedDataSet[1] , 
               trainingLabels= normalizedDataSet[2],validationLabels=normalizedDataSet[3])
# trainSet, validationSet, trainLabels, validationLabels]

(110,)
(96, 110)
Accuracy is:  52.6315789474 %
(96, 110)
Accuracy is:  78.9473684211 %
(96, 110)
Accuracy is:  63.1578947368 %
(96, 110)
Accuracy is:  57.8947368421 %
(96, 110)
Accuracy is:  57.8947368421 %
(96, 110)
Accuracy is:  57.8947368421 %
(96, 110)
Accuracy is:  57.8947368421 %


In [171]:
twitterGoogleDataset = generateDataset(timeStampBuckets, timeStampBucketsTwitter[:56,], 10, 10)
run_classifier(trainingSet=twitterGoogleDataset[0], validationSet=twitterGoogleDataset[1] , 
               trainingLabels= twitterGoogleDataset[2],validationLabels=twitterGoogleDataset[3])

(120,)
(20, 120)
Accuracy is:  80.0 %
(20, 120)
Accuracy is:  78.8888888889 %
(20, 120)
Accuracy is:  78.8888888889 %
(20, 120)
Accuracy is:  78.8888888889 %
(20, 120)
Accuracy is:  78.8888888889 %


# With a normalized bucket size of 0.01

In [232]:
#Timestamp bucket google vs facebook, 500ms bucket
runKFold(timeStampBuckets, timeStampBucketsFB)

(113, 120)
(90, 120)
(90, 120)
(90, 120)
(91, 120)
(91, 120)
Accuracy:  96.4822134387 %
4.82411067194
5


#### Timestamp bucket google vs twitter 500ms bucket
runKFold(timeStampBuckets, timeStampBucketsTwitter)

In [235]:
#Timestamp bucket facebook vs twitter, 500ms bucket
runKFold(timeStampBucketsFB, timeStampBucketsTwitter)

(118, 120)
(94, 120)
(94, 120)
(94, 120)
(95, 120)
(95, 120)
Accuracy:  99.1666666667 %
4.95833333333
5


In [264]:
#Normalized twitter vs google, 0.01 bucket
runKFold(twitterNormalizedBuckets, googleNormalizedBuckets, 0.01)

(117, 110)
(93, 110)
(93, 110)
(94, 110)
(94, 110)
(94, 110)
Accuracy:  65.0 %
3.25
5


In [229]:
#Normalized facebook vs twitter, 0.01 bucket
runKFold(twitterNormalizedBuckets, facebookNormalizedBuckets)

(118, 110)
(94, 110)
(94, 110)
(94, 110)
(95, 110)
(95, 110)
Accuracy:  85.5797101449 %
4.27898550725
5


In [230]:
#Normalized facebook vs google, 0.01 bucket
runKFold(googleNormalizedBuckets, facebookNormalizedBuckets)

(113, 110)
(90, 110)
(90, 110)
(90, 110)
(91, 110)
(91, 110)
Accuracy:  76.7984189723 %
3.83992094862
5


In [31]:
runKFold(googleNormalizedBuckets, youtubeNormalizedBuckets)

(81, 110)
(64, 110)
(65, 110)
(65, 110)
(65, 110)
(65, 110)
Accuracy:  83.8235294118 %
4.19117647059
5


  y = column_or_1d(y, warn=True)


In [32]:
runKFold(facebookNormalizedBuckets, youtubeNormalizedBuckets)


(82, 110)
(65, 110)
(65, 110)
(66, 110)
(66, 110)
(66, 110)
Accuracy:  72.2794117647 %
3.61397058824
5


In [38]:
runKFold(linkedInNormalizedBuckets, twitterNormalizedBuckets)

(118, 110)
(94, 110)
(94, 110)
(94, 110)
(95, 110)
(95, 110)
Accuracy:  78.8043478261 %
3.9402173913
5


In [41]:
runKFold(linkedInNormalizedBuckets, googleNormalizedBuckets)

(113, 110)
(90, 110)
(90, 110)
(90, 110)
(91, 110)
(91, 110)
Accuracy:  81.4229249012 %
4.07114624506
5


# Changing Normalized Bucket Size to 0.1

In [337]:
googleNormalizedBuckets01 = arrangeInBuckets(googleNormalizedTimeStamp,  0.1, 1.1)
facebookNormalizedBuckets01 = arrangeInBuckets(facebookNormalizedTimeStamp, 0.1, 1.1) 
twitterNormalizedBuckets01 = arrangeInBuckets(twitterNormalizedTimeStamp, 0.1, 1.1)

In [373]:
#Normalized twitter vs google, 0.01 bucket
runKFold(googleNormalizedBuckets01, twitterNormalizedBuckets01, 1)

(117, 11)
(93, 11)
(93, 11)
(94, 11)
(94, 11)
(94, 11)
Accuracy:  66.6304347826 %
3.33152173913
5


In [381]:
#Normalized facebook vs twitter, 0.01 bucket
runKFold(facebookNormalizedBuckets01, twitterNormalizedBuckets01, 1)

(118, 11)
(94, 11)
(94, 11)
(94, 11)
(95, 11)
(95, 11)
Accuracy:  86.4130434783 %
4.32065217391
5


In [382]:
#Normalized facebook vs google, 0.01 bucket
runKFold(facebookNormalizedBuckets01, googleNormalizedBuckets01, 1)

(113, 11)
(90, 11)
(90, 11)
(90, 11)
(91, 11)
(91, 11)
Accuracy:  85.8102766798 %
4.29051383399
5


# Changing Normalized bucket size to 0.001


In [383]:
googleNormalizedBuckets0001 = arrangeInBuckets(googleNormalizedTimeStamp,  0.001, 1.1)
facebookNormalizedBuckets0001 = arrangeInBuckets(facebookNormalizedTimeStamp, 0.001, 1.1) 
twitterNormalizedBuckets0001 = arrangeInBuckets(twitterNormalizedTimeStamp, 0.001, 1.1)

In [445]:
#Normalized twitter vs google, 0.0001 bucket
runKFold(googleNormalizedBuckets0001, twitterNormalizedBuckets0001, 1)

(117, 1100)
(93, 1100)
(93, 1100)
(94, 1100)
(94, 1100)
(94, 1100)
Accuracy:  70.7608695652 %
3.53804347826
5


In [454]:
#Normalized facebook vs twitter, 0.0001 bucket
runKFold(facebookNormalizedBuckets0001, twitterNormalizedBuckets0001, 1)

(118, 1100)
(94, 1100)
(94, 1100)
(94, 1100)
(95, 1100)
(95, 1100)
Accuracy:  70.4347826087 %
3.52173913043
5


In [470]:
#Normalized facebook vs google, 0.0001 bucket
runKFold(facebookNormalizedBuckets0001, googleNormalizedBuckets0001, 0.1)

(113, 1100)
(90, 1100)
(90, 1100)
(90, 1100)
(91, 1100)
(91, 1100)
Accuracy:  69.209486166 %
3.4604743083
5


# Changing normalized bucket size to 0.2


In [476]:
googleNormalizedBuckets02 = arrangeInBuckets(googleNormalizedTimeStamp,  0.2, 1.4)
facebookNormalizedBuckets02 = arrangeInBuckets(facebookNormalizedTimeStamp, 0.2, 1.4) 
twitterNormalizedBuckets02 = arrangeInBuckets(twitterNormalizedTimeStamp, 0.2, 1.4)

In [490]:
#Normalized twitter vs google, 0.2 bucket
runKFold(googleNormalizedBuckets02, twitterNormalizedBuckets02, 1)

(117, 6)
(93, 6)
(93, 6)
(94, 6)
(94, 6)
(94, 6)
Accuracy:  68.2608695652 %
3.41304347826
5


In [485]:
#Normalized facebook vs twitter, 0.2 bucket
runKFold(facebookNormalizedBuckets02, twitterNormalizedBuckets02, 1)

(118, 6)
(94, 6)
(94, 6)
(94, 6)
(95, 6)
(95, 6)
Accuracy:  88.9492753623 %
4.44746376812
5


In [488]:
#Normalized facebook vs google, 0.2 bucket
runKFold(facebookNormalizedBuckets02, googleNormalizedBuckets02, 1)

(113, 6)
(90, 6)
(90, 6)
(90, 6)
(91, 6)
(91, 6)
Accuracy:  87.628458498 %
4.3814229249
5


# Changing normalized bucket size to 0.5

In [491]:
googleNormalizedBuckets05 = arrangeInBuckets(googleNormalizedTimeStamp,  0.5, 1.5)
facebookNormalizedBuckets05 = arrangeInBuckets(facebookNormalizedTimeStamp, 0.5, 1.5) 
twitterNormalizedBuckets05 = arrangeInBuckets(twitterNormalizedTimeStamp, 0.5, 1.5)

In [492]:
#Normalized twitter vs google, 0.5 bucket
runKFold(googleNormalizedBuckets05, twitterNormalizedBuckets05, 1)

(117, 3)
(93, 3)
(93, 3)
(94, 3)
(94, 3)
(94, 3)
Accuracy:  60.7608695652 %
3.03804347826
5


In [493]:
#Normalized facebook vs twitter, 0.5 bucket
runKFold(facebookNormalizedBuckets05, twitterNormalizedBuckets05, 1)

(118, 3)
(94, 3)
(94, 3)
(94, 3)
(95, 3)
(95, 3)
Accuracy:  90.652173913 %
4.53260869565
5


In [494]:
#Normalized facebook vs twitter, 0.5 bucket
runKFold(facebookNormalizedBuckets05, twitterNormalizedBuckets05, 1)

(118, 3)
(94, 3)
(94, 3)
(94, 3)
(95, 3)
(95, 3)
Accuracy:  90.5797101449 %
4.52898550725
5


# Changing normalized bucket size to 1

In [497]:
googleNormalizedBuckets1 = arrangeInBuckets(googleNormalizedTimeStamp,  1, 2)
facebookNormalizedBuckets1 = arrangeInBuckets(facebookNormalizedTimeStamp, 1, 2) 
twitterNormalizedBuckets1 = arrangeInBuckets(twitterNormalizedTimeStamp, 1, 2)

In [503]:
#Normalized twitter vs google, 1 bucket
runKFold(googleNormalizedBuckets1, twitterNormalizedBuckets1, 100)

(117, 2)
(93, 2)
(93, 2)
(94, 2)
(94, 2)
(94, 2)
Accuracy:  51.231884058 %
2.5615942029
5


In [504]:
#Normalized facebook vs twitter, 1 bucket
runKFold(facebookNormalizedBuckets1, twitterNormalizedBuckets1, 1)

(118, 2)
(94, 2)
(94, 2)
(94, 2)
(95, 2)
(95, 2)
Accuracy:  90.652173913 %
4.53260869565
5


In [505]:
#Normalized facebook vs twitter, 1 bucket
runKFold(facebookNormalizedBuckets1, twitterNormalizedBuckets1, 1)

(118, 2)
(94, 2)
(94, 2)
(94, 2)
(95, 2)
(95, 2)
Accuracy:  90.7246376812 %
4.53623188406
5


### Inference: Total number of packets gives better distinction for websites with different  no. of packets

# Total Number of Blinks


In [521]:
print 'Mean number of blinks for google is:', getMeanBlinks(timeStampArrayNP)

Mean number of blinks for google is: 117.236363636


In [523]:
print 'Mean number of blinks for facebook is:', getMeanBlinks(timeStampArrayFBNP)

Mean number of blinks for facebook is: 193.964285714


In [524]:
print 'Mean number of blinks for twitter is:', getMeanBlinks(timeStampArrayTwitterNP)

Mean number of blinks for twitter is: 121.566666667


In [67]:
print 'Mean number of blinks for linkedIn is:', getMeanBlinks(timeStampArrayLinkedInNP)

Mean number of blinks for linkedIn is: 82.9642857143


In [66]:
print 'Mean number of blinks for youtube is:', getMeanBlinks(timeStampArrayYoutubeNP)

Mean number of blinks for youtube is: 343.5


# Mean time taken for page load

In [69]:
def meanTimeTaken(timeStampArray):
    timeDiffSum = 0.0
    for i in range(timeStampArray.shape[0]):
        timeDiffSum += timeStampArray[i][len(timeStampArray[i])-1] - timeStampArray[i][0]
    return timeDiffSum/(i*1000)




In [541]:
print 'Mean time to load google is:', meanTimeTaken(timeStampArrayNP), 's'

Mean number to load google is: 13.5981818182 s


In [542]:
print 'Mean time to load facebook is:', meanTimeTaken(timeStampArrayFBNP), 's'

Mean time to load facebook is: 26.6416666667 s


In [543]:
print 'Mean time to load twitter is:', meanTimeTaken(timeStampArrayTwitterNP), 's'

Mean time to load twitter is: 15.5272222222 s


In [73]:
print 'Mean time to load linkedIn is:', meanTimeTaken(timeStampArrayLinkedInNP), 's'

 Mean time to load linkedIn is: 9.53452380952 s


In [71]:
print 'Mean time to load youtubre is:', meanTimeTaken(timeStampArrayYoutubeNP), 's'

 Mean time to load youtubre is: 31.0027777778 s


# Get total blinks

In [551]:
# Total Blinks and Time for FB
getTotalBlinksAndTime(timeStampArrayFBNP)

182 30833.3333333
225 30733.3333333
232 33833.3333333
194 26300.0
217 30533.3333333
165 25433.3333333
6 1800.0
221 30433.3333333
186 29966.6666667
185 24966.6666667
233 32233.3333333
152 26600.0
191 25533.3333333
185 24700.0
208 32433.3333333
195 30400.0
165 25800.0
184 25066.6666667
195 25766.6666667
185 25000.0
133 26400.0
129 29800.0
226 25133.3333333
274 30033.3333333
272 31866.6666667
274 30833.3333333
223 24933.3333333
133 24066.6666667
277 31266.6666667
234 26266.6666667
241 25733.3333333
206 24533.3333333
270 29900.0
222 25100.0
218 25466.6666667
210 25533.3333333
264 32733.3333333
238 26400.0
275 31666.6666667
210 26233.3333333
278 30700.0
270 30366.6666667
186 24900.0
174 30366.6666667
12 1800.0
13 1733.33333333
178 25500.0
215 31966.6666667
186 25100.0
175 24966.6666667
173 24400.0
142 25066.6666667
161 25533.3333333
67 16966.6666667
91 25533.3333333
127 29633.3333333
179 25133.3333333


In [549]:
# Total Blinks and Time for Twitter
getTotalBlinksAndTime(timeStampArrayTwitterNP)

115 16133.3333333
124 16500.0
127 16633.3333333
119 16233.3333333
129 16933.3333333
122 16500.0
121 17000.0
124 15400.0
138 16533.3333333
120 15100.0
129 15833.3333333
114 15300.0
120 15033.3333333
119 14600.0
113 13800.0
121 14966.6666667
118 14166.6666667
121 16066.6666667
121 15300.0
118 14900.0
110 15066.6666667
116 14900.0
120 15566.6666667
124 15200.0
121 14866.6666667
123 15466.6666667
110 15566.6666667
120 14966.6666667
120 14566.6666667
114 14533.3333333
121 15066.6666667
117 14933.3333333
120 14966.6666667
129 15033.3333333
110 15000.0
118 15633.3333333
114 13566.6666667
113 15100.0
117 14600.0
111 14333.3333333
137 15966.6666667
119 13966.6666667
113 15000.0
116 14966.6666667
116 15000.0
116 15466.6666667
123 15766.6666667
111 15966.6666667
114 14166.6666667
115 14466.6666667
127 15266.6666667
125 15066.6666667
125 15766.6666667
117 15900.0
124 15400.0
118 15500.0
110 14600.0
129 15333.3333333
123 15566.6666667
125 15300.0
110 15333.3333333


In [552]:
# Total Blinks and Time for Google
getTotalBlinksAndTime(timeStampArrayNP)

74 8733.33333333
21 10100.0
12 9000.0
114 12100.0
106 10300.0
120 14233.3333333
128 12666.6666667
114 10800.0
110 11133.3333333
195 20266.6666667
116 12233.3333333
117 15166.6666667
110 11200.0
116 11966.6666667
111 11733.3333333
120 13300.0
124 12366.6666667
118 12766.6666667
119 14033.3333333
116 12133.3333333
122 13866.6666667
113 11333.3333333
132 14533.3333333
123 13300.0
114 10900.0
109 11266.6666667
117 13833.3333333
127 14033.3333333
111 11633.3333333
111 10366.6666667
120 20733.3333333
122 16466.6666667
56 11366.6666667
115 15833.3333333
113 10933.3333333
127 19966.6666667
125 20100.0
130 16700.0
113 12300.0
186 23466.6666667
118 12900.0
116 12333.3333333
122 14100.0
120 11766.6666667
111 10733.3333333
112 11433.3333333
113 11666.6666667
113 12033.3333333
131 16133.3333333
119 14133.3333333
118 11833.3333333
122 11800.0
117 11700.0
122 18133.3333333
121 12233.3333333
126 15800.0


# Run multiclass SVM


In [570]:
multiClassDataSet = generateMultiLabelDataset(timeStampBuckets, timeStampBucketsFB[:57,:], timeStampBucketsTwitter[:57,:]
                                              timeStampBucketsLinkedIn timeStampBuckets,  48, 48, 48, 48, )
run_classifier(trainingSet=multiClassDataSet[0], validationSet=multiClassDataSet[1] , 
               trainingLabels= multiClassDataSet[2],validationLabels=multiClassDataSet[3])

(15, 120)
(23, 120)
(120,)
(144, 120)
Accuracy is:  52.1739130435 %
(144, 120)
Accuracy is:  52.1739130435 %
(144, 120)
Accuracy is:  56.5217391304 %
(144, 120)
Accuracy is:  60.8695652174 %
(144, 120)
Accuracy is:  60.8695652174 %
(144, 120)
Accuracy is:  60.8695652174 %
(144, 120)
Accuracy is:  60.8695652174 %
(144, 120)
Accuracy is:  60.8695652174 %
(144, 120)
Accuracy is:  60.8695652174 %


In [562]:
print multiClassDataSet[0].shape
print multiClassDataSet[1].shape
print multiClassDataSet[2].shape
print multiClassDataSet[3].shape

(144, 120)
(152, 120)
(152, 1)
(15, 1)


In [36]:
multiData = np.vstack((timeStampBuckets, timeStampBucketsFB, timeStampBucketsTwitter, timeStampBucketsLinkedIn, timeStampBucketsYoutube))
multiLabels = np.vstack((np.ones((timeStampBuckets.shape[0], 1)) , 2*np.ones((timeStampBucketsFB.shape[0],1 ))
                        ,3*np.ones((timeStampBucketsTwitter.shape[0], 1)), 4*np.ones((timeStampBucketsLinkedIn.shape[0], 1))
                                   , 5*np.ones((timeStampBucketsYoutube.shape[0], 1))
                        ))
print multiData.shape
print multiLabels.shape

(256, 120)
(256, 1)


In [31]:
# One vs all SVM, non-normalized
runMultiKFold(multiData, multiLabels, c = 10)

(204, 120)
Normalized confusion matrix
[[ 0.67  0.11  0.11  0.11  0.  ]
 [ 0.    1.    0.    0.    0.  ]
 [ 0.08  0.    0.92  0.    0.  ]
 [ 0.    0.    0.1   0.9   0.  ]
 [ 0.    0.2   0.    0.2   0.6 ]]
(205, 120)
(205, 120)
(205, 120)
(205, 120)
Accuracy:  81.2292609351 %
4.06146304676
5


In [None]:
multiNormalizedData = np.vstack((googleNormalizedBuckets, facebookNormalizedBuckets, twitterNormalizedBuckets))
multiNormalizedLabels = np.vstack((np.ones((googleNormalizedBuckets.shape[0], 1)) , 2*np.ones((facebookNormalizedBuckets.shape[0],1 ))
                        ,3*np.ones((twitterNormalizedBuckets.shape[0], 1))
                        ))

In [None]:
runMultiKFold(multiNormalizedData, multiNormalizedLabels, c = 1)

In [28]:
 
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()




# Compare the normalized twitter 256 dataset with the existing set


In [31]:
def getClassifyModel(trainData, trainLabels, c):
    classifier = classifyTrain(trainData, trainLabels, c)
    return c

In [54]:
model = classifyTrain(multiData, multiLabels, 100)

(256, 120)


In [55]:
# evaluate accuracy of twitter predictions at 256kbps
model = classifyTrain(multiData, multiLabels, 100)
preds = model.predict(diffTwitterNormalizedBuckets256)
print preds
acc = 0.0
for i in preds:
    if i == 3: acc+=1
print 'Accuracy = ', (acc/len(preds))*100, '%'

[ 1.  2.  2.  1.  2.  1.  2.  1.  1.  3.]
Accuracy =  10.0 %


In [59]:
# evaluate accuracy of facebook predictions at 1Mbps
model = classifyTrain(multiData, multiLabels, 10)
# print model.predict(diffFacebookNormalizedBuckets1)
preds = model.predict(diffFacebookNormalizedBuckets1)
print preds
acc = 0.0
for i in preds:
    if i == 2: acc+=1
print 'Accuracy = ', (acc/len(preds))*100, '%'

(256, 120)
[ 2.  1.  4.  5.  1.  1.  2.]
Accuracy =  28.5714285714 %


In [61]:
# evaluate accuracy of facebook predictions at 2Mbps
# print model.predict(diffFacebookNormalizedBuckets2)
model = classifyTrain(multiData, multiLabels, 10)
preds = model.predict(diffFacebookNormalizedBuckets2)
print preds
acc = 0.0
for i in preds:
    if i == 2: acc+=1
print 'Accuracy = ', (acc/len(preds))*100, '%'

(256, 120)
[ 1.  2.  1.  2.  2.  2.  1.  5.  1.  2.  2.  2.  2.  2.  2.  2.]
Accuracy =  68.75 %


In [63]:
model = classifyTrain(multiData, multiLabels, 10)
preds = model.predict(diffFacebookNormalizedBuckets4)
print preds
acc = 0.0
for i in preds:
    if i == 2: acc+=1
print 'Accuracy = ', (acc/len(preds))*100, '%'

(256, 120)
[ 2.  2.  2.  1.  2.  2.  2.  1.  1.  2.  2.  2.  2.  2.  2.  2.  2.  2.
  1.  1.  1.  1.]
Accuracy =  68.1818181818 %


In [64]:
model = classifyTrain(multiData, multiLabels, 10)
preds = model.predict(diffFacebookNormalizedBuckets8)
print preds
acc = 0.0
for i in preds:
    if i == 2: acc+=1
print 'Accuracy = ', (acc/len(preds))*100, '%'

(256, 120)
[ 2.  1.  2.  2.  1.  1.  1.  2.  2.  1.]
Accuracy =  50.0 %


In [65]:
model = classifyTrain(multiData, multiLabels, 10)
preds = model.predict(diffFacebookNormalizedBuckets16)
print preds
acc = 0.0
for i in preds:
    if i == 2: acc+=1
print 'Accuracy = ', (acc/len(preds))*100, '%'

(256, 120)
[ 1.  2.  1.  2.  2.  1.]
Accuracy =  50.0 %


In [75]:

# writeToFile(facebookNormalizedBuckets, 'facebookNormalizedTimeStamp512.csv')          
# writeToFile(diffFacebookNormalizedBuckets2, 'facebookNormalizedTimeStamp2.csv')
 
# writeToFile(diffFacebookNormalizedBuckets4, 'facebookNormalizedTimeStamp4.csv')
# writeToFile(diffFacebookNormalizedBuckets8, 'facebookNormalizedTimeStamp8.csv')
# writeToFile(diffFacebookNormalizedBuckets16, 'facebookNormalizedTimeStamp16.csv')

writeToFile(diffTwitterNormalizedBuckets256, 'twitterNormalizedTimeStamps256.csv')
writeToFile(twitterNormalizedBuckets, 'twitterNormalizedTimeStamps512.csv')

       
