In [1]:
import json
import os
import csv
import pandas as pd

import numpy as np
import scipy
from fastdtw import fastdtw
from scipy.spatial.distance import euclidean
from sklearn import preprocessing
from sklearn import model_selection
from sklearn.model_selection import KFold
from sklearn.naive_bayes import BernoulliNB
from sklearn.naive_bayes import GaussianNB


%matplotlib inline
import matplotlib.pyplot as plt

import time

In [2]:
humanResultDir = "C:/Users/ZhanYuan/Desktop/polimi/magistrale/Tesi/MyLocalProject/human-and-robotic-exploration/DownloadedResults"
robortResultDir = "C:/Users/ZhanYuan/Desktop/polimi/magistrale/Tesi/MyLocalProject/human-and-robotic-exploration/Unity/Project Arena/Assets/Results/ExperimentSamplesMultyTarget1"
robortResultDir2 = "C:/Users/ZhanYuan/Desktop/polimi/magistrale/Tesi/MyLocalProject/human-and-robotic-exploration/Unity/Project Arena/Assets/Results/ExperimentSamples2"
originalMapDir = "C:/Users/ZhanYuan/Desktop/polimi/magistrale/Tesi/MyLocalProject/human-and-robotic-exploration/Python"

In [3]:
# convert the x and y saved in database into corresponding coordinate of map
def rotate(x, y, origin, mapName):
    
    # shift to origin
    x1 = x #- origin[0]
    y1 = y #- origin[1]

    #rotate
    x2 = y1
    y2 = -x1
    
    # shift back
    x3 = x2
    if mapName == "uffici2.map":
        y3 = y2 +53
    if mapName == "open2.map":
        y3 = y2 +57
    if mapName == "open1.map":
        y3 = y2 +48
    if mapName == "uffici1.map":
        y3 = y2 +54 

    return x3, y3

In [4]:
# convert stringArray saved in json to normal list of array of coordinates
def stringArray2listArrayOfMap(target, origin, mapName):
    
    res = [[]]
    
    for i in range(len(target)):
        a, b = target[i].split(",")
        a, b = rotate(int(a),int(b), origin, mapName)
        
        # need also to translate to one cell below, because function 'rotate' consider y starting from index 1
        newCoordinate = [int(a), int(b) - 1]
        
        if len(res[0]) == 0:
            res = [newCoordinate]
        else:
            res = np.append(res, [newCoordinate], axis=0)
    
    return res

In [5]:
def getOriginalMap(mapName):
    if mapName == "open1.map":
        originalMapFile = "open1PythonFormat.map.txt"
    if mapName == "open2.map":
        originalMapFile = "open2PythonFormat.map.txt"
    if mapName == "uffici1.map":
        originalMapFile = "uffici1PythonFormat.map.txt"
    if mapName == "uffici2.map":
        originalMapFile = "uffici2PythonFormat.map.txt"
        
    return originalMapFile     

In [6]:
vocabulary = {
    "SR" : "straight-right",
    "S" : "straight",
    "SL" : "straight-left",
    "R" : "right",
    "L" : "left",
    "B" : "behind"
}

# Analyze human path 

## find free direction

In [7]:
# funzione per ottenere array di contorno di matrice quadrato a partire dalla cella (0, n) in senso orario
def computeContour(n):
    
    #initial cell
    cell = np.array([0, n],dtype = np.int)
    contour = np.array([cell])
    #initial operation
    increase = True
    first = True
    
    # with equation, we can know how many cell will be obtained, minus 1 because we have already add one into array
    for loop in range(((4 * n) + (4 * (n - 1)) + 4) - 1):
        if abs(cell[0]) == n:
            if abs(cell[1]) == n:
                if first:  
                    cell[0] = (cell[0] + 1) if increase else (cell[0] - 1)
                else:

                    cell[1] = (cell[1] + 1) if increase else (cell[1] - 1)
            else:
                cell[1] = (cell[1] + 1) if increase else (cell[1] - 1)
                if abs(cell[1]) == n:
                    first = True
                    increase = False if cell[0] == n else True
        else:
            cell[0] = (cell[0] + 1) if increase else (cell[0] - 1)
            if abs(cell[0]) == n:
                first = False
                increase = False if cell[1] == n else True
                
        contour = np.append(contour, [cell], axis = 0)
        
    return contour
                    

In [8]:
def computeCoefficientOfLine(origin, prev):
    
    # line equation: y = k * x + b
    # k = (y2 - y1) / (x2 - x1)
    
    numerator = origin[1] - prev[1]
    denominator = origin[0] - prev[0]
    
    if denominator == 0:
        return np.inf
    else:
        return numerator / denominator

In [9]:
# compute the straight point from origin on contour which have same direction from prev to origin
def computeStraightPointOnContour(origin, prev, contour):
    
    shift_x = origin[0]
    shift_y = origin[1]
    
    origin = [origin[0] - shift_x, origin[1] - shift_y]
    prev = [prev[0] - shift_x, prev[1] - shift_y]
    
    coefficient = computeCoefficientOfLine(origin,prev)
    
    indices = np.array([],dtype = np.int)
    difference = np.inf
    
    # look for two nearest contour
    for index in range(len(contour)):
        coefficient_elem = computeCoefficientOfLine(np.array([0,0]), contour[index])
        
        if coefficient == np.inf and coefficient_elem == np.inf:
            indices = np.append(indices, [index], axis = 0)
        elif coefficient != np.inf and abs(coefficient - coefficient_elem) <= difference:
            if abs(coefficient - coefficient_elem) < difference:
                indices = np.array([],dtype = np.int)
                difference = abs(coefficient - coefficient_elem)
            indices = np.append(indices, [index], axis = 0)
            
    #return point with opposite of point prev
    for index in indices:
        if contour[index][0] * prev[0] <= 0 and contour[index][1] * prev[1] <= 0:
            return contour[index], index

In [10]:
def computeDirectionPointOnContour(contour, n, straight):
    direction = {}
    direction[vocabulary["S"]] = straight[0]
    direction[vocabulary["B"]] = []
    index = straight[1]
    
    # we have other 7 direction
    for i in range(7):
        index = index + n
        if index >= len(contour):
            index = index % len(contour)
        
        if i == 0:
            direction[vocabulary["SR"]] = contour[index]
        elif i == 1:
            direction[vocabulary["R"]] = contour[index]
        elif i == 5:
            direction[vocabulary["L"]] = contour[index]
        elif i == 6:
            direction[vocabulary["SL"]] = contour[index]
        else:
            if len(direction[vocabulary["B"]]) == 0:
                direction[vocabulary["B"]] = np.array([contour[index]])
            else:
                direction[vocabulary["B"]] = np.insert(direction[vocabulary["B"]], len(direction[vocabulary["B"]]) ,[contour[index]], axis = 0)
    return direction

In [11]:
def createDirectionBoolDict():
    dictionary = {}
    dictionary[vocabulary["S"]] = True
    dictionary[vocabulary["SR"]] = True
    dictionary[vocabulary["R"]] = True
    dictionary[vocabulary["L"]] = True
    dictionary[vocabulary["SL"]] = True
    
    return dictionary
    

In [12]:
def checkFreePosition(coordinate, mapMatrix):
    if mapMatrix[coordinate[1], coordinate[0]] == 1:
        return False
    return True

In [13]:
def isOnYourDirection(current, nextPosition, directions):
    
    shift_x = current[0]
    shift_y = current[1]
    
    current = [current[0] - shift_x, current[1] - shift_y]
    nextPosition = [nextPosition[0] - shift_x, nextPosition[1] - shift_y]
    coefficient = computeCoefficientOfLine(nextPosition,current)
    difference = np.inf
    max_abs = -np.inf
    
    for key in directions:
        # except "behind" because to guarantee to have not same coefficient k 
        if key != vocabulary["B"]:
            coefficient_elem = computeCoefficientOfLine(current, [current[0] + directions[key][0], current[1] + directions[key][1]])
            if coefficient == np.inf and coefficient_elem == np.inf:
                nearest = directions[key]
                if nearest[0] * nextPosition[0] >= 0 and nearest[1] * nextPosition[1] >= 0:
                    return key              
            elif coefficient == np.inf and abs(coefficient_elem) > max_abs:
                max_abs = abs(coefficient_elem)
                nearest = directions[key]
                res_key = key
            elif coefficient == 0 and coefficient_elem == 0:
                nearest = directions[key]
                if nearest[0] * nextPosition[0] >= 0 and nearest[1] * nextPosition[1] >= 0:
                    return key
            elif coefficient != np.inf and abs(coefficient - coefficient_elem) < difference:
                difference = abs(coefficient - coefficient_elem)
                nearest = directions[key]
                res_key = key

    if nearest[0] * nextPosition[0] >= 0 and nearest[1] * nextPosition[1] >= 0:
        return res_key
    else:
        return vocabulary["B"]
        

## already passed cell

In [14]:
def createMatrix(cell, radius):
    matrix = []
    for i in range(-radius, radius + 1):
        for j in range(-radius, radius + 1):
            matrix.append([i + cell[0], j + cell[1]])
    return np.array(matrix)

In [15]:
# find and mark all cell between current and prev
def passedCell(current, prev):
    radius = max(abs(current[0] - prev[0]), abs(current[1] - prev[1]))
    currentMatrix = createMatrix(current, radius)
    prevMatrix = createMatrix(prev, radius)
    
    # all cell in union of two sets
    passed = prevMatrix[np.in1d(prevMatrix.view(dtype='i,i').reshape(prevMatrix.shape[0]), 
                                    currentMatrix.view(dtype='i,i').reshape(currentMatrix.shape[0]))]
    
    return passed

## analyze single file

In [16]:
def analyzeHumanPath(filePath, n):
    #filePath = humanResultDir + "/" + filePath

    listFreeDirection = []
    listNextDirection = []
    listPassedBool = []
    listNearIndex = []
    
    if os.path.isfile(filePath):
        with open(filePath) as path:
            pathData = json.load(path)
            ip = pathData["ip"]
            mapName = pathData["mapName"]
            array_position = pathData["position"]
                        
            mapFile = getOriginalMap(mapName)
            
            # create map matrix
            mapMatrix = [[]]
            targets = []
            if os.path.isfile(originalMapDir + "/" + mapFile):
                with open(originalMapDir + "/" + mapFile) as f:
                    array = []
                    content = f.readlines()
                    content = [x.strip() for x in content]
                    
                    target_index = len(content) - 1
                    for line in content:
                        array = line.split(',')                        
                        for i in range(len(array)):
                            array[i] = int(array[i]) 
                            
                            #if that position is a target, we save its coordinates
                            if array[i] == 4:
                                c_array = np.array([i, target_index])
                                targets.append(c_array)
                            
                        array = np.array(array)
                        if len(mapMatrix[0]) == 0:
                            mapMatrix = np.array([array])
                        else:
                            mapMatrix = np.insert(mapMatrix, 0, array, axis=0)
                        
                        target_index -= 1   
            mapMatrix = mapMatrix.astype(object)
            targets = np.array(targets)
            
            # convert array into right format
            origin = (0.0,0.0)            
            array_position = stringArray2listArrayOfMap(array_position, origin, mapName)
            
            listContour = []
            for i in range(n):
                contour = computeContour(i + 1)
                listContour.append(contour) 
                            
            #define initial value for prev as first element
            prev =  array_position[0]
            listNearIndex.append("null")
            listPassedBool.append("null")
            listFreeDirection.append("null")
            listNextDirection.append("null")
            
            # from second action to penultimate 
            for index in range(1, len(array_position) - 1):
                current = array_position[index]     
                
                if any(current != prev):
                    
                    # check the all directions
                    listDirection = []
                    for c_index in range(len(listContour)):
                        direction = computeDirectionPointOnContour(listContour[c_index], c_index + 1, 
                                                                   computeStraightPointOnContour(current, prev, listContour[c_index]))
                        listDirection.append(direction)
                    
                    # check free directions
                    freeDirection = createDirectionBoolDict()
                    for key in freeDirection:
                        for cont in listDirection:
                            freeDirection[key] = freeDirection[key] and checkFreePosition([current[0] + cont[key][0], current[1] + cont[key][1]], mapMatrix)
                    
                    # check next direction
                    nextPosition = array_position[index + 1]
                    nextIteration = 1
                    while all(nextPosition == current):
                        nextPosition = array_position[index + nextIteration]
                        nextIteration += 1                       
                    nextDirection = isOnYourDirection(current, nextPosition, listDirection[len(listDirection) - 1])  
                    
                    # change mapMatrix based on passed cells
                    passed = passedCell(current, prev)
                    passedThreshold = 0.75
                    countPassed = 0
                    passedBool = False
                    passedIndexList = []
                    for p in passed:
                        try:
                            check = mapMatrix[p[1]][p[0]]
                        except IndexError:
                            break
                            
                        if type(check) is str:
                            countPassed += 1
                            if int(check) not in passedIndexList:
                                passedIndexList.append(int(check))
                            
                        #change cell value if it is not a string, neither wall nor target
                        if type(check) is not str and check != 1 and check != 4:
                            mapMatrix[p[1]][p[0]] = str(index)
                            
                    
                    if countPassed / len(passed) > passedThreshold:
                        passedBool = True
                        listNearIndex.append(sorted(passedIndexList))
                        
                    else:
                        listNearIndex.append("null")
                    
                            
                    listPassedBool.append(passedBool)
                    listFreeDirection.append(freeDirection)
                    listNextDirection.append(nextDirection)
                
                    prev = current
                else:
                    listNearIndex.append("null")
                    listPassedBool.append("null")
                    listFreeDirection.append("null")
                    listNextDirection.append("null")
        
        # for last one
        listNearIndex.append("null")
        listPassedBool.append("null")
        listFreeDirection.append("null")
        listNextDirection.append("null")   
        
    
        return ip, mapName, array_position, listFreeDirection, listNextDirection, listPassedBool, listNearIndex

## create csv file

In [17]:
def createCSVFile():
    fileName = "humanPathAnalysis.csv"
    
    with open(fileName,'w') as csvoutput:
        
        w = csv.writer(csvoutput, delimiter=',', lineterminator='\n')
        row = np.array(["ip", "mapName", "resultFile", "index", "x", "y",
                        "freeLeft", "freeRight", "freeStraight", "freeStraightLeft", "freeStraightRight", 
                        "nextDirection", "alreadyPassed", "nearIndex"])  
        w.writerow(row)
        
        
        index = 1
        finish = False
        while finish == False:
            
            resultFile = "Result" + str(index) + "t.txt"
            humanPath = humanResultDir + "/" + resultFile
        
            if os.path.isfile(humanPath):    
                ip, mapName, array_position, listFreeDirection, listNextDirection, listPassed, listNearIndex = analyzeHumanPath(humanPath,5)
                for i in range(len(array_position)):
                    row = np.array([ip, mapName, resultFile, i, array_position[i][0], array_position[i][1]])
                    
                    if listFreeDirection[i] == "null":
                        freeDirection = np.array(["null", "null", "null", "null", "null"])
                        row = np.append(row, freeDirection)
                    else:
                        #create a temp array
                        temp = np.zeros(5, dtype = bool)
                        for key in listFreeDirection[i]:
                            
                            # ensure that we have same order as defined above
                            if key == vocabulary["L"]:
                                temp[0] = listFreeDirection[i][key]
                            if key == vocabulary["R"]:
                                temp[1] = listFreeDirection[i][key]
                            if key == vocabulary["S"]:
                                temp[2] = listFreeDirection[i][key]
                            if key == vocabulary["SL"]:
                                temp[3] = listFreeDirection[i][key]
                            if key == vocabulary["SR"]:
                                temp[4] = listFreeDirection[i][key]
                        
                        row = np.append(row, temp)
                    
                    row = np.append(row, listNextDirection[i])
                    row = np.append(row, listPassed[i])
                    
                    if type(listNearIndex[i]) is str:
                        row = np.append(row, listNearIndex[i])
                    else:
                        temp = ""
                        for elem in listNearIndex[i]:
                            temp = temp + " " + str(elem)
                        row = np.append(row, temp)
                    
                    w.writerow(row)
                    
                index = index + 1
            else:
                finish = True
            
   

In [18]:
createCSVFile()

## analyze csv

In [19]:
allpath = pd.read_csv('humanPathAnalysis.csv',usecols=[12])
allpath = allpath.groupby(['alreadyPassed']).size().reset_index(name='counts')
allpath

Unnamed: 0,alreadyPassed,counts
0,False,3392
1,True,1348


##### now, we consider only relation between free directions and its real next action

In [20]:
paths = pd.read_csv('humanPathAnalysis.csv', usecols=range(6,12))
print(paths.shape[0], " lines")
paths.head(5)

5288  lines


Unnamed: 0,freeLeft,freeRight,freeStraight,freeStraightLeft,freeStraightRight,nextDirection
0,,,,,,
1,True,True,True,False,False,straight
2,False,False,True,False,False,straight
3,False,False,True,False,False,straight
4,False,False,True,True,True,straight


##### eliminate row which contain null elements 

In [21]:
paths_delete_rows = paths.dropna(axis=0)
paths_delete_rows.head(5)

Unnamed: 0,freeLeft,freeRight,freeStraight,freeStraightLeft,freeStraightRight,nextDirection
1,True,True,True,False,False,straight
2,False,False,True,False,False,straight
3,False,False,True,False,False,straight
4,False,False,True,True,True,straight
5,True,True,True,False,False,straight-right


##### change all variables into numerical

In [22]:
path_dict = {}

# keep all the label encoders used
label_encoders = {}

for v in paths_delete_rows.columns:
    label_encoders[v] = preprocessing.LabelEncoder()
    label_encoders[v].fit(paths_delete_rows[v])
    path_dict[v] = label_encoders[v].transform(paths_delete_rows[v])

path_numerical = pd.DataFrame(path_dict)

path_numerical.head(5)

Unnamed: 0,freeLeft,freeRight,freeStraight,freeStraightLeft,freeStraightRight,nextDirection
0,1,1,1,0,0,3
1,0,0,1,0,0,3
2,0,0,1,0,0,3
3,0,0,1,1,1,3
4,1,1,1,0,0,5


nextDirection: 0 -> behind, 1 -> left, 2 -> right, 3 -> straight, 4 -> straight_left, 5 -> straight_right

### overview 

In [23]:
path_values = paths_delete_rows.values

countAllFree = 0
countBehind = 0
countStraight = 0
countRight = 0
countLeft = 0
countStraightRight = 0
countStraightLeft = 0

for row in path_values:
    if row[0] and row[1] and row[2] and row[3] and row[4]:
        countAllFree += 1

    if row[5] == vocabulary["B"]:
        countBehind += 1
    if row[5] == vocabulary["S"]:
        countStraight += 1
    if row[5] == vocabulary["R"]:
        countRight += 1
    if row[5] == vocabulary["L"]:
        countLeft += 1
    if row[5] == vocabulary["SR"]:
        countStraightRight += 1
    if row[5] == vocabulary["SL"]:
        countStraightLeft += 1

print("there are %d rows which have all directions free, percentage = %.2f%%" 
      %(countAllFree, (countAllFree / len(path_values)) * 100))
print("there are %d actions that players choose to go back, percentage = %.2f%%" 
      %(countBehind, (countBehind / len(path_values)) * 100))
print("there are %d actions that players choose to go Straight, percentage = %.2f%%" 
      %(countStraight, (countStraight / len(path_values)) * 100))
print("there are %d actions that players choose to go Right, percentage = %.2f%%" 
      %(countRight, (countRight / len(path_values)) * 100))
print("there are %d actions that players choose to go Left, percentage = %.2f%%" 
      %(countLeft, (countLeft / len(path_values)) * 100))
print("there are %d actions that players choose to go StraightRight, percentage = %.2f%%" 
      %(countStraightRight, (countStraightRight / len(path_values)) * 100))
print("there are %d actions that players choose to go StraightLeft, percentage = %.2f%%" 
      %(countStraightLeft, (countStraightLeft / len(path_values)) * 100))


there are 33 rows which have all directions free, percentage = 0.70%
there are 399 actions that players choose to go back, percentage = 8.42%
there are 2214 actions that players choose to go Straight, percentage = 46.71%
there are 205 actions that players choose to go Right, percentage = 4.32%
there are 75 actions that players choose to go Left, percentage = 1.58%
there are 964 actions that players choose to go StraightRight, percentage = 20.34%
there are 883 actions that players choose to go StraightLeft, percentage = 18.63%


### subcases

#### behind

In [24]:
#delete rows that have not behind as nextDiretion
behind_group = path_numerical.drop(path_numerical[path_numerical.nextDirection != 0].index) 
#delete column nextDiretion
behind_group = behind_group.drop(behind_group.columns[behind_group.columns == 'nextDirection'], axis=1)
#groupby all free direction and count them
behind_group = behind_group.groupby(['freeLeft','freeRight','freeStraight','freeStraightLeft','freeStraightRight']).size().reset_index(name='counts')
#riorder dataFrame by counts
behind_group = behind_group.sort_values(by='counts', ascending=False)
behind_group[:5]

Unnamed: 0,freeLeft,freeRight,freeStraight,freeStraightLeft,freeStraightRight,counts
0,0,0,0,0,0,124
16,1,0,0,0,0,48
4,0,0,1,0,0,31
2,0,0,0,1,0,30
1,0,0,0,0,1,19


#### left

In [25]:
left_group = path_numerical.drop(path_numerical[path_numerical.nextDirection != 1].index) 
left_group = left_group.drop(left_group.columns[left_group.columns == 'nextDirection'], axis=1)
left_group = left_group.groupby(['freeLeft','freeRight','freeStraight','freeStraightLeft','freeStraightRight']).size().reset_index(name='counts')
left_group = left_group.sort_values(by='counts', ascending=False)
left_group[:5]

Unnamed: 0,freeLeft,freeRight,freeStraight,freeStraightLeft,freeStraightRight,counts
12,1,1,1,0,0,15
5,1,0,0,0,0,14
11,1,1,0,0,0,11
8,1,0,1,0,0,8
0,0,0,0,0,0,6


#### right

In [26]:
right_group = path_numerical.drop(path_numerical[path_numerical.nextDirection != 2].index) 
right_group = right_group.drop(right_group.columns[right_group.columns == 'nextDirection'], axis=1)
right_group = right_group.groupby(['freeLeft','freeRight','freeStraight','freeStraightLeft','freeStraightRight']).size().reset_index(name='counts')
right_group = right_group.sort_values(by='counts', ascending=False)
right_group[:5]

Unnamed: 0,freeLeft,freeRight,freeStraight,freeStraightLeft,freeStraightRight,counts
7,0,1,0,0,0,34
0,0,0,0,0,0,23
23,1,1,0,0,0,16
1,0,0,0,0,1,13
11,0,1,1,0,0,13


#### straight

In [27]:
straight_group = path_numerical.drop(path_numerical[path_numerical.nextDirection != 3].index) 
straight_group = straight_group.drop(straight_group.columns[straight_group.columns == 'nextDirection'], axis=1)
straight_group = straight_group.groupby(['freeLeft','freeRight','freeStraight','freeStraightLeft','freeStraightRight']).size().reset_index(name='counts')
straight_group = straight_group.sort_values(by='counts', ascending=False)
straight_group[:5]

Unnamed: 0,freeLeft,freeRight,freeStraight,freeStraightLeft,freeStraightRight,counts
4,0,0,1,0,0,927
0,0,0,0,0,0,176
6,0,0,1,1,0,132
5,0,0,1,0,1,113
20,1,0,1,0,0,112


#### straight_left

In [28]:
straight_l_group = path_numerical.drop(path_numerical[path_numerical.nextDirection != 4].index) 
straight_l_group = straight_l_group.drop(straight_l_group.columns[straight_l_group.columns == 'nextDirection'], axis=1)
straight_l_group = straight_l_group.groupby(['freeLeft','freeRight','freeStraight','freeStraightLeft','freeStraightRight']).size().reset_index(name='counts')
straight_l_group = straight_l_group.sort_values(by='counts', ascending=False)
straight_l_group[:5]

Unnamed: 0,freeLeft,freeRight,freeStraight,freeStraightLeft,freeStraightRight,counts
2,0,0,0,1,0,168
0,0,0,0,0,0,117
4,0,0,1,0,0,100
6,0,0,1,1,0,90
18,1,0,0,1,0,44


#### straight_right

In [29]:
straight_r_group = path_numerical.drop(path_numerical[path_numerical.nextDirection != 5].index) 
straight_r_group = straight_r_group.drop(straight_r_group.columns[straight_r_group.columns == 'nextDirection'], axis=1)
straight_r_group = straight_r_group.groupby(['freeLeft','freeRight','freeStraight','freeStraightLeft','freeStraightRight']).size().reset_index(name='counts')
straight_r_group = straight_r_group.sort_values(by='counts', ascending=False)
straight_r_group[:5]

Unnamed: 0,freeLeft,freeRight,freeStraight,freeStraightLeft,freeStraightRight,counts
1,0,0,0,0,1,154
0,0,0,0,0,0,115
4,0,0,1,0,0,109
5,0,0,1,0,1,69
17,1,0,0,0,1,44


## Naive Bayes

In [30]:
target_attr = 'nextDirection'
path_target = path_numerical.columns[path_numerical.columns == target_attr]
path_variables = path_numerical.columns[path_numerical.columns != target_attr]
path_target_values = path_numerical.drop(path_variables, axis=1).values
path_variables_values = path_numerical.drop(path_target, axis=1).values

In [31]:
path_variables_values

array([[1, 1, 1, 0, 0],
       [0, 0, 1, 0, 0],
       [0, 0, 1, 0, 0],
       ...,
       [0, 0, 1, 1, 1],
       [1, 0, 0, 1, 0],
       [1, 0, 0, 1, 0]], dtype=int64)

In [32]:
path_target_values = path_target_values.reshape(path_target_values.shape[0])
path_target_values

array([3, 3, 3, ..., 2, 0, 3], dtype=int64)

###### prior probability of class

In [33]:
prior_class = pd.crosstab(index=path_numerical['nextDirection'], columns="count")
prior_class.index=[vocabulary['B'],vocabulary['L'],vocabulary['R'],vocabulary['S'],vocabulary['SL'],vocabulary['SR']]
prior_class

col_0,count
behind,399
left,75
right,205
straight,2214
straight-left,883
straight-right,964


###### prior probability of predictor

In [34]:
left_tab = pd.crosstab(index=path_numerical['freeLeft'],
                     columns=path_numerical['nextDirection'])
left_tab.columns = [vocabulary['B'],vocabulary['L'],vocabulary['R'],vocabulary['S'],vocabulary['SL'],vocabulary['SR']]
left_tab

Unnamed: 0_level_0,behind,left,right,straight,straight-left,straight-right
freeLeft,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,282,17,132,1841,704,699
1,117,58,73,373,179,265


In [35]:
right_tab = pd.crosstab(index=path_numerical['freeRight'],
                     columns=path_numerical['nextDirection'])
right_tab.columns = [vocabulary['B'],vocabulary['L'],vocabulary['R'],vocabulary['S'],vocabulary['SL'],vocabulary['SR']]
right_tab

Unnamed: 0_level_0,behind,left,right,straight,straight-left,straight-right
freeRight,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,333,42,89,1844,696,747
1,66,33,116,370,187,217


In [36]:
straight_tab = pd.crosstab(index=path_numerical['freeStraight'],
                     columns=path_numerical['nextDirection'])
straight_tab.columns = [vocabulary['B'],vocabulary['L'],vocabulary['R'],vocabulary['S'],vocabulary['SL'],vocabulary['SR']]
straight_tab

Unnamed: 0_level_0,behind,left,right,straight,straight-left,straight-right
freeStraight,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,296,37,134,504,498,541
1,103,38,71,1710,385,423


In [37]:
straight_left_tab = pd.crosstab(index=path_numerical['freeStraightLeft'],
                     columns=path_numerical['nextDirection'])
straight_left_tab.columns = [vocabulary['B'],vocabulary['L'],vocabulary['R'],vocabulary['S'],vocabulary['SL'],vocabulary['SR']]
straight_left_tab

Unnamed: 0_level_0,behind,left,right,straight,straight-left,straight-right
freeStraightLeft,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,291,67,157,1727,393,718
1,108,8,48,487,490,246


In [38]:
straight_right_tab = pd.crosstab(index=path_numerical['freeStraightRight'],
                     columns=path_numerical['nextDirection'])
straight_right_tab.columns = [vocabulary['B'],vocabulary['L'],vocabulary['R'],vocabulary['S'],vocabulary['SL'],vocabulary['SR']]
straight_right_tab

Unnamed: 0_level_0,behind,left,right,straight,straight-left,straight-right
freeStraightRight,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,329,71,137,1759,696,440
1,70,4,68,455,187,524


we notice that it has not zero frequency here

#### use library

In [39]:
model_B = BernoulliNB()
model_G = GaussianNB()

remenber that order of columns are: left, right, straight, straight_left, straight_right, 
and they correspond to the value of output 1, 2, 3, 4, 5 

In [40]:
xval_B = model_selection.cross_val_score(model_B, path_variables_values, path_target_values, 
                                         cv=KFold(n_splits=10, shuffle=True, random_state=1234))
np.average(xval_B),np.std(xval_B)

(0.5362869198312236, 0.015554690794233564)

In [41]:
xval_G = model_selection.cross_val_score(model_G, path_variables_values, path_target_values, 
                                         cv=KFold(n_splits=10, shuffle=True, random_state=1234))
np.average(xval_G),np.std(xval_G)

(0.5116033755274261, 0.020546617019687907)

In [42]:
model_B.fit(path_variables_values, path_target_values)
model_G.fit(path_variables_values, path_target_values)

test_data = [0, 1, 1, 1, 1]
predicted_B= model_B.predict([test_data])
predicted_G= model_G.predict([test_data])
print(predicted_B, predicted_G)

[3] [5]
