In [1]:
import re
import statistics
import numpy as np
import matplotlib.pyplot as plt
import itertools
import operator
from hmmlearn import hmm
from datetime import datetime
from pandas import DataFrame
from hmmlearn.hmm import GaussianHMM
from matplotlib import cm, pyplot as plt
np.random.seed(1)

In [2]:
# Hidden states
states = ['Passenger', 'Vehicle', 'Stoplight']
n_states = len(states)

In [3]:
# Oberservable states
observations = ['ChangeLaneLeft', 'ChangeLaneRight', 'Stop', 'Go', 'Load', 'Unload']

In [4]:
# Load event file
my_file = open('data.csv', 'r')
# Get everything from file
states = [] # Passenger, Vehicle, Stoplight
state_changes = [] # P (Passenger), V (Vehicle), S (Stoplight), N = No Change, this takes note of the changes
events = [] # Order of annotated events

for line in my_file:
    l = [i.strip() for i in line.split(',')]
    states.append(l[0]) # i.e. ['Passenger', 'P', 'Stop', datetime], l[0] = states, l[1] = state (if changed/no change), 
    state_changes.append(l[1]) # l[2] = event
    events.append(l[2])

In [5]:
# sp = start probability
total_count = len(states)
sp_passenger = states.count('Passenger') / total_count
sp_vehicle = states.count('Vehicle') / total_count
sp_stoplight = states.count('Stoplight') / total_count

print('Start probabilities')
print(sp_passenger, sp_vehicle, sp_stoplight)

# Convert state_changes to transition state i.e. change S N N N N S to SS, SV, VP, etc.
temp = ''
transition_state = []
for cur_state in state_changes:
    if temp == '':
        temp = cur_state
    else:
        if cur_state != 'N':
            word = temp+cur_state
            transition_state.append(word)
            temp = cur_state

Start probabilities
0.25539906103286386 0.643661971830986 0.09953051643192488


In [6]:
# tp = transition probability 
# p = passenger, v = vehicle, s = stoplight
# pp = passenger x passenger, pv = passenger x vehicle, ps = passenger x stoplight
total_transition = len(transition_state)
total_p = transition_state.count('PP') + transition_state.count('PV') + transition_state.count('PS')
total_v = transition_state.count('VP') + transition_state.count('VV') + transition_state.count('VS')
total_s = transition_state.count('SP') + transition_state.count('SV') + transition_state.count('SS')

# transition probability for p
tp_pp = transition_state.count('PP') / total_p
tp_pv = transition_state.count('PV') / total_p
tp_ps = transition_state.count('PS') / total_p

# transition probability for v
tp_vp = transition_state.count('VP') / total_v
tp_vv = transition_state.count('VV') / total_v
tp_vs = transition_state.count('VS') / total_v

# transition probability for s
tp_sp = transition_state.count('SP') / total_s
tp_sv = transition_state.count('SV') / total_s
tp_ss = transition_state.count('SS') / total_s

print('------------------------')
print('Transition probabilities')
print(tp_pp, tp_pv, tp_ps)
print(tp_vp, tp_vv, tp_vs)
print(tp_sp, tp_sv, tp_ss)

------------------------
Transition probabilities
0.7775735294117647 0.20772058823529413 0.014705882352941176
0.07804522246535375 0.8541210795040116 0.06783369803063458
0.06542056074766354 0.40654205607476634 0.5280373831775701


In [7]:
events_num = []

for event in events:
    if event == 'ChangeLaneLeft':
        events_num.append(0)
    elif event == 'ChangeLaneRight':
        events_num.append(1)
    elif event == 'Stop':
        events_num.append(2)
    elif event == 'Go':
        events_num.append(3)
    elif event == 'Load':
        events_num.append(4)
    elif event == 'Unload':
        events_num.append(5)

print(len(states))
print(len(events_num))
emission = []
for i in range(0, total_count):
    emission.append(states[i]+str(events_num[i]))   

2130
2130


In [8]:
# emission probabilities
# emission probability of passenger to an event X
total_ep = emission.count('Passenger0') + emission.count('Passenger1') + emission.count('Passenger2') + emission.count('Passenger3') + emission.count('Passenger4') + emission.count('Passenger5')
ep_p0 = emission.count('Passenger0') / total_ep
ep_p1 = emission.count('Passenger1') / total_ep
ep_p2 = emission.count('Passenger2') / total_ep
ep_p3 = emission.count('Passenger3') / total_ep
ep_p4 = emission.count('Passenger4') / total_ep
ep_p5 = emission.count('Passenger5') / total_ep

# emission probability of vehicle to an event X
total_ev = emission.count('Vehicle0') + emission.count('Vehicle1') + emission.count('Vehicle2') + emission.count('Vehicle3') + emission.count('Vehicle4') + emission.count('Vehicle5')
ep_v0 = emission.count('Vehicle0') / total_ev
ep_v1 = emission.count('Vehicle1') / total_ev
ep_v2 = emission.count('Vehicle2') / total_ev
ep_v3 = emission.count('Vehicle3') / total_ev
ep_v4 = emission.count('Vehicle4') / total_ev
ep_v5 = emission.count('Vehicle5') / total_ev

# emission probability of stoplight to an event X
total_es = emission.count('Stoplight0') + emission.count('Stoplight1') + emission.count('Stoplight2') + emission.count('Stoplight3') + emission.count('Stoplight4') + emission.count('Stoplight5')
ep_s0 = emission.count('Stoplight0') / total_es
ep_s1 = emission.count('Stoplight1') / total_es
ep_s2 = emission.count('Stoplight2') / total_es
ep_s3 = emission.count('Stoplight3') / total_es
ep_s4 = emission.count('Stoplight4') / total_es
ep_s5 = emission.count('Stoplight5') / total_es

print('------------------------')
print('Emission probabilities')
print(ep_p0, ep_p1, ep_p2, ep_p3, ep_p4, ep_p5)
print(ep_v0, ep_v1, ep_v2, ep_v3, ep_v4, ep_v5)
print(ep_s0, ep_s1, ep_s2, ep_s3, ep_s4, ep_s5)

------------------------
Emission probabilities
0.0 0.1213235294117647 0.26286764705882354 0.2702205882352941 0.16176470588235295 0.18382352941176472
0.47191830780452226 0.4573304157549234 0.034281546316557256 0.03574033552151714 0.0007293946024799417 0.0
0.0047169811320754715 0.018867924528301886 0.49056603773584906 0.4858490566037736 0.0 0.0


In [9]:
# Needed variables: emission (i.e. Stoplight0 -> Vehicle1, etc.)

# create new array with next_state
patterns = []

for i in range(0, len(emission)-1):
    patterns.append(emission[i]+str(emission[i+1]))

def most_common(L):
  # get an iterable of (item, iterable) pairs
  SL = sorted((x, i) for i, x in enumerate(L))
  # print 'SL:', SL
  groups = itertools.groupby(SL, key=operator.itemgetter(0))
  # auxiliary function to get "quality" for an item
  def _auxfun(g):
    item, iterable = g
    count = 0
    min_index = len(L)
    for _, where in iterable:
      count += 1
      min_index = min(min_index, where)
    # print 'item %r, count %r, minind %r' % (item, count, min_index)
    print(item, count, min_index)
    return count, -min_index
  # pick the highest-count/earliest item
  return max(groups, key=_auxfun)[0]

print("Most common event: ", most_common(emission))
print("Most common pattern: ", most_common(patterns))

Passenger1 66 20
Passenger2 143 3
Passenger3 147 2
Passenger4 88 1
Passenger5 100 101
StopLight2 1 174
StopLight3 1 175
Stoplight0 1 1366
Stoplight1 4 133
Stoplight2 104 68
Stoplight3 103 69
Vehicle0 647 18
Vehicle1 627 19
Vehicle2 47 6
Vehicle3 49 8
Vehicle4 1 7
ï»¿Passenger2 1 0
Most common event:  Vehicle0
Passenger1Passenger1 1 283
Passenger1Passenger2 63 20
Passenger1Vehicle0 1 705
Passenger1Vehicle1 1 1125
Passenger2Passenger2 1 1581
Passenger2Passenger3 7 44
Passenger2Passenger4 48 3
Passenger2Passenger5 87 100
Passenger3Passenger1 9 128
Passenger3Passenger2 19 2
Passenger3Passenger4 7 858
Passenger3Stoplight1 1 132
Passenger3Stoplight2 6 67
Passenger3Vehicle0 91 34
Passenger3Vehicle1 9 287
Passenger3Vehicle2 4 5
Passenger3Vehicle3 1 509
Passenger4Passenger3 58 1
Passenger4Passenger4 26 1529
Passenger4Vehicle1 1 236
Passenger4Vehicle3 3 1788
Passenger5Passenger3 81 101
Passenger5Passenger4 4 235
Passenger5Passenger5 11 1776
Passenger5Stoplight3 1 1160
Passenger5Vehicle3 2 312
St