In [2]:
import re
import statistics
import numpy as np
import matplotlib.pyplot as plt
import itertools
import operator
from hmmlearn import hmm
from datetime import datetime
from pandas import DataFrame
from hmmlearn.hmm import GaussianHMM
from matplotlib import cm, pyplot as plt
np.random.seed(1)

In [3]:
# Hidden states
states = ['Passenger', 'Vehicle', 'Stoplight']
n_states = len(states)

In [4]:
# Oberservable states
observations = ['ChangeLaneLeft', 'ChangeLaneRight', 'Stop', 'Go', 'Load', 'Unload']

In [None]:
# Load event file
my_file = open('data.csv', 'r')
# Get everything from file
states = [] # Passenger, Vehicle, Stoplight
state_changes = [] # P (Passenger), V (Vehicle), S (Stoplight), N = No Change, this takes note of the changes
events = [] # Order of annotated events

for line in my_file:
    l = [i.strip() for i in line.split(',')]
    states.append(l[0]) # i.e. ['Passenger', 'P', 'Stop', datetime], l[0] = states, l[1] = state (if changed/no change), 
    state_changes.append(l[1]) # l[2] = event
    events.append(l[2])

In [None]:
# sp = start probability
total_count = len(states)
sp_passenger = states.count('Passenger') / total_count
sp_vehicle = states.count('Vehicle') / total_count
sp_stoplight = states.count('Stoplight') / total_count

print('Start probabilities')
print(sp_passenger, sp_vehicle, sp_stoplight)

# Convert state_changes to transition state i.e. change S N N N N S to SS, SV, VP, etc.
temp = ''
transition_state = []
for cur_state in state_changes:
    if temp == '':
        temp = cur_state
    else:
        if cur_state != 'N':
            word = temp+cur_state
            transition_state.append(word)
            temp = cur_state

In [None]:
# tp = transition probability 
# p = passenger, v = vehicle, s = stoplight
# pp = passenger x passenger, pv = passenger x vehicle, ps = passenger x stoplight
total_transition = len(transition_state)
total_p = transition_state.count('PP') + transition_state.count('PV') + transition_state.count('PS')
total_v = transition_state.count('VP') + transition_state.count('VV') + transition_state.count('VS')
total_s = transition_state.count('SP') + transition_state.count('SV') + transition_state.count('SS')

# transition probability for p
tp_pp = transition_state.count('PP') / total_p
tp_pv = transition_state.count('PV') / total_p
tp_ps = transition_state.count('PS') / total_p

# transition probability for v
tp_vp = transition_state.count('VP') / total_v
tp_vv = transition_state.count('VV') / total_v
tp_vs = transition_state.count('VS') / total_v

# transition probability for s
tp_sp = transition_state.count('SP') / total_s
tp_sv = transition_state.count('SV') / total_s
tp_ss = transition_state.count('SS') / total_s

print('------------------------')
print('Transition probabilities')
print(tp_pp, tp_pv, tp_ps)
print(tp_vp, tp_vv, tp_vs)
print(tp_sp, tp_sv, tp_ss)

In [None]:
events_num = []

for event in events:
    if event == 'Stop':
        events_num.append(0)
    elif event == 'Go':
        events_num.append(1)
    elif event == 'Wait':
        events_num.append(2)
    elif event == 'Load':
        events_num.append(3)
    elif event == 'Unload':
        events_num.append(4)
    elif event == '1LaneLeft':
        events_num.append(5)
    elif event == '2LaneLeft':
        events_num.append(6)
    elif event == '1LaneRight':
        events_num.append(7)
    elif event == '2LaneRight':
        events_num.append(8) 

print(len(states))
print(len(events_num))
emission = []
for i in range(0, total_count):
    emission.append(states[i]+str(events_num[i]))   

In [None]:
# emission probabilities
# emission probability of passenger to an event X
total_ep = emission.count('Passenger0') + emission.count('Passenger1') + emission.count('Passenger2') + emission.count('Passenger3') + emission.count('Passenger4') + emission.count('Passenger5') + emission.count('Passenger6') + emission.count('Passenger7') + emission.count('Passenger8')
ep_p0 = emission.count('Passenger0') / total_ep
ep_p1 = emission.count('Passenger1') / total_ep
ep_p2 = emission.count('Passenger2') / total_ep
ep_p3 = emission.count('Passenger3') / total_ep
ep_p4 = emission.count('Passenger4') / total_ep
ep_p5 = emission.count('Passenger5') / total_ep
ep_p6 = emission.count('Passenger6') / total_ep
ep_p7 = emission.count('Passenger7') / total_ep
ep_p8 = emission.count('Passenger8') / total_ep

# emission probability of vehicle to an event X
total_ev = emission.count('Vehicle0') + emission.count('Vehicle1') + emission.count('Vehicle2') + emission.count('Vehicle3') + emission.count('Vehicle4') + emission.count('Vehicle5') + emission.count('Vehicle6') + emission.count('Vehicle7') + emission.count('Vehicle8')
ep_v0 = emission.count('Vehicle0') / total_ev
ep_v1 = emission.count('Vehicle1') / total_ev
ep_v2 = emission.count('Vehicle2') / total_ev
ep_v3 = emission.count('Vehicle3') / total_ev
ep_v4 = emission.count('Vehicle4') / total_ev
ep_v5 = emission.count('Vehicle5') / total_ev
ep_v6 = emission.count('Vehicle6') / total_ev
ep_v7 = emission.count('Vehicle7') / total_ev
ep_v8 = emission.count('Vehicle8') / total_ev

# emission probability of stoplight to an event X
total_es = emission.count('Stoplight0') + emission.count('Stoplight1') + emission.count('Stoplight2') + emission.count('Stoplight3') + emission.count('Stoplight4') + emission.count('Stoplight5') + emission.count('Stoplight6') + emission.count('Stoplight7') + emission.count('Stoplight8')
ep_s0 = emission.count('Stoplight0') / total_es
ep_s1 = emission.count('Stoplight1') / total_es
ep_s2 = emission.count('Stoplight2') / total_es
ep_s3 = emission.count('Stoplight3') / total_es
ep_s4 = emission.count('Stoplight4') / total_es
ep_s5 = emission.count('Stoplight5') / total_es
ep_s6 = emission.count('Stoplight6') / total_es
ep_s7 = emission.count('Stoplight7') / total_es
ep_s8 = emission.count('Stoplight8') / total_es

print('------------------------')
print('Emission probabilities')
print(ep_p0, ep_p1, ep_p2, ep_p3, ep_p4, ep_p5, ep_p6, ep_p7, ep_p8)
print(ep_v0, ep_v1, ep_v2, ep_v3, ep_v4, ep_v5, ep_v6, ep_v7, ep_v8)
print(ep_s0, ep_s1, ep_s2, ep_s3, ep_s4, ep_s5, ep_s6, ep_s7, ep_s8)

In [None]:
# emission probabilities
# emission probability of passenger to an event X
total_ep = emission.count('Passenger0') + emission.count('Passenger1') + emission.count('Passenger2') + emission.count('Passenger3') + emission.count('Passenger4') + emission.count('Passenger5') + emission.count('Passenger6') + emission.count('Passenger7') + emission.count('Passenger8')
ep_p0 = emission.count('Passenger0') / total_ep
ep_p1 = emission.count('Passenger1') / total_ep
ep_p2 = emission.count('Passenger2') / total_ep
ep_p3 = emission.count('Passenger3') / total_ep
ep_p4 = emission.count('Passenger4') / total_ep
ep_p5 = emission.count('Passenger5') / total_ep
ep_p6 = emission.count('Passenger6') / total_ep
ep_p7 = emission.count('Passenger7') / total_ep
ep_p8 = emission.count('Passenger8') / total_ep

# emission probability of vehicle to an event X
total_ev = emission.count('Vehicle0') + emission.count('Vehicle1') + emission.count('Vehicle2') + emission.count('Vehicle3') + emission.count('Vehicle4') + emission.count('Vehicle5') + emission.count('Vehicle6') + emission.count('Vehicle7') + emission.count('Vehicle8')
ep_v0 = emission.count('Vehicle0') / total_ev
ep_v1 = emission.count('Vehicle1') / total_ev
ep_v2 = emission.count('Vehicle2') / total_ev
ep_v3 = emission.count('Vehicle3') / total_ev
ep_v4 = emission.count('Vehicle4') / total_ev
ep_v5 = emission.count('Vehicle5') / total_ev
ep_v6 = emission.count('Vehicle6') / total_ev
ep_v7 = emission.count('Vehicle7') / total_ev
ep_v8 = emission.count('Vehicle8') / total_ev

# emission probability of stoplight to an event X
total_es = emission.count('Stoplight0') + emission.count('Stoplight1') + emission.count('Stoplight2') + emission.count('Stoplight3') + emission.count('Stoplight4') + emission.count('Stoplight5') + emission.count('Stoplight6') + emission.count('Stoplight7') + emission.count('Stoplight8')
ep_s0 = emission.count('Stoplight0') / total_es
ep_s1 = emission.count('Stoplight1') / total_es
ep_s2 = emission.count('Stoplight2') / total_es
ep_s3 = emission.count('Stoplight3') / total_es
ep_s4 = emission.count('Stoplight4') / total_es
ep_s5 = emission.count('Stoplight5') / total_es
ep_s6 = emission.count('Stoplight6') / total_es
ep_s7 = emission.count('Stoplight7') / total_es
ep_s8 = emission.count('Stoplight8') / total_es

print('------------------------')
print('Emission probabilities')
print(ep_p0, ep_p1, ep_p2, ep_p3, ep_p4, ep_p5, ep_p6, ep_p7, ep_p8)
print(ep_v0, ep_v1, ep_v2, ep_v3, ep_v4, ep_v5, ep_v6, ep_v7, ep_v8)
print(ep_s0, ep_s1, ep_s2, ep_s3, ep_s4, ep_s5, ep_s6, ep_s7, ep_s8)

In [None]:
# Needed variables: emission (i.e. Stoplight0 -> Vehicle1, etc.)

# create new array with next_state
patterns = []

for i in range(0, len(emission)-1):
    patterns.append(emission[i]+str(emission[i+1]))

def most_common(L):
  # get an iterable of (item, iterable) pairs
  SL = sorted((x, i) for i, x in enumerate(L))
  # print 'SL:', SL
  groups = itertools.groupby(SL, key=operator.itemgetter(0))
  # auxiliary function to get "quality" for an item
  def _auxfun(g):
    item, iterable = g
    count = 0
    min_index = len(L)
    for _, where in iterable:
      count += 1
      min_index = min(min_index, where)
    # print 'item %r, count %r, minind %r' % (item, count, min_index)
    print(item, count, min_index)
    return count, -min_index
  # pick the highest-count/earliest item
  return max(groups, key=_auxfun)[0]

print("Most common event: ", most_common(emission))
print("Most common pattern: ", most_common(patterns))