# UV Driver Behavior Modeling Using HMM

## Import Dependencies

In [1]:
import re
import statistics
import numpy as np
import matplotlib.pyplot as plt
import itertools
import operator
from hmmlearn import hmm
from datetime import datetime
from pandas import DataFrame
from hmmlearn.hmm import GaussianHMM
from matplotlib import cm, pyplot as plt
np.random.seed(1)

## Training the Hidden Markov Model

### Create List of Possible Hidden States

In [2]:
states = ['Passenger', 'Vehicle', 'Stoplight']
n_states = len(states)

### Create a List of Possible Observable States 

In [3]:
# Oberservable states
observations = ['ChangeLaneLeft', 'ChangeLaneRight', 'Stop', 'Go', 'Load', 'Unload']

### Load Collected Data

In [4]:
# Load event file
my_file = open('data.csv', 'r', encoding='utf-8-sig')
# Get everything from file
observed_states = [] # Passenger, Vehicle, Stoplight
observed_state_changes = [] # P (Passenger), V (Vehicle), S (Stoplight), N = No Change, this takes note of the changes
observed_events = [] # Order of annotated events

for line in my_file:
    l = [i.strip() for i in line.split(',')]
    observed_states.append(l[0]) # i.e. ['Passenger', 'P', 'Stop', datetime], l[0] = states, l[1] = state (if changed/observed_no change), 
    observed_state_changes.append(l[1]) # l[2] = event
    observed_events.append(l[2])

### Calculate State Probability

In [5]:
# sp = start probability
total_count = len(observed_states)
sp_passenger = observed_states.count('Passenger') / total_count
sp_vehicle = observed_states.count('Vehicle') / total_count
sp_stoplight = observed_states.count('Stoplight') / total_count

print('Start probabilities')
print(sp_passenger, sp_vehicle, sp_stoplight)
state_probability = np.array([sp_passenger, sp_vehicle, sp_stoplight])

Start probabilities
0.25586854460093894 0.643661971830986 0.10046948356807511


### Convert State Changes to Transition States 

In [6]:
# Convert state_changes to transition state i.e. change S N N N N S to SS, SV, VP, etc.
temp = ''
transition_state = []
for cur_state in observed_state_changes:
    if temp == '':
        temp = cur_state
    else:
        if cur_state != 'N':
            word = temp+cur_state
            transition_state.append(word)
            temp = cur_state

### Calculate Transition Probabilities

In [7]:
# tp = transition probability 
# p = passenger, v = vehicle, s = stoplight
# pp = passenger x passenger, pv = passenger x vehicle, ps = passenger x stoplight
total_transition = len(transition_state)
total_p = transition_state.count('PP') + transition_state.count('PV') + transition_state.count('PS')
total_v = transition_state.count('VP') + transition_state.count('VV') + transition_state.count('VS')
total_s = transition_state.count('SP') + transition_state.count('SV') + transition_state.count('SS')

# transition probability for p
tp_pp = transition_state.count('PP') / total_p
tp_pv = transition_state.count('PV') / total_p
tp_ps = transition_state.count('PS') / total_p

# transition probability for v
tp_vp = transition_state.count('VP') / total_v
tp_vv = transition_state.count('VV') / total_v
tp_vs = transition_state.count('VS') / total_v

# transition probability for s
tp_sp = transition_state.count('SP') / total_s
tp_sv = transition_state.count('SV') / total_s
tp_ss = transition_state.count('SS') / total_s

transition_probability = np.array([[tp_pp, tp_pv, tp_ps],
                                   [tp_vp, tp_vv, tp_vs],
                                   [tp_sp, tp_sv, tp_ss]])

In [8]:
print('------------------------')
print('Transition probabilities')
print(tp_pp, tp_pv, tp_ps)
print(tp_vp, tp_vv, tp_vs)
print(tp_sp, tp_sv, tp_ss)

------------------------
Transition probabilities
0.7775735294117647 0.20772058823529413 0.014705882352941176
0.07804522246535375 0.8541210795040116 0.06783369803063458
0.06542056074766354 0.40654205607476634 0.5280373831775701


### Get Emissions 

In [9]:
temp_events_num = []

for event in observed_events:
    if event == 'ChangeLaneLeft':
        temp_events_num.append(0)
    elif event == 'ChangeLaneRight':
        temp_events_num.append(1)
    elif event == 'Stop':
        temp_events_num.append(2)
    elif event == 'Go':
        temp_events_num.append(3)
    elif event == 'Load':
        temp_events_num.append(4)
    elif event == 'Unload':
        temp_events_num.append(5)

print(len(observed_states))
print(len(temp_events_num))
emission = []
for i in range(0, total_count):
    emission.append(observed_states[i]+str(temp_events_num[i]))   

events_num = np.array([temp_events_num]).reshape(-1,1)

2130
2130


### Get Emission Probabilities

In [10]:
# emission probabilities
# emission probability of passenger to an event X
total_ep = emission.count('Passenger0') + emission.count('Passenger1') + emission.count('Passenger2') + emission.count('Passenger3') + emission.count('Passenger4') + emission.count('Passenger5')
ep_p0 = emission.count('Passenger0') / total_ep
ep_p1 = emission.count('Passenger1') / total_ep
ep_p2 = emission.count('Passenger2') / total_ep
ep_p3 = emission.count('Passenger3') / total_ep
ep_p4 = emission.count('Passenger4') / total_ep
ep_p5 = emission.count('Passenger5') / total_ep

# emission probability of vehicle to an event X
total_ev = emission.count('Vehicle0') + emission.count('Vehicle1') + emission.count('Vehicle2') + emission.count('Vehicle3') + emission.count('Vehicle4') + emission.count('Vehicle5')
ep_v0 = emission.count('Vehicle0') / total_ev
ep_v1 = emission.count('Vehicle1') / total_ev
ep_v2 = emission.count('Vehicle2') / total_ev
ep_v3 = emission.count('Vehicle3') / total_ev
ep_v4 = emission.count('Vehicle4') / total_ev
ep_v5 = emission.count('Vehicle5') / total_ev

# emission probability of stoplight to an event X
total_es = emission.count('Stoplight0') + emission.count('Stoplight1') + emission.count('Stoplight2') + emission.count('Stoplight3') + emission.count('Stoplight4') + emission.count('Stoplight5')
ep_s0 = emission.count('Stoplight0') / total_es
ep_s1 = emission.count('Stoplight1') / total_es
ep_s2 = emission.count('Stoplight2') / total_es
ep_s3 = emission.count('Stoplight3') / total_es
ep_s4 = emission.count('Stoplight4') / total_es
ep_s5 = emission.count('Stoplight5') / total_es

emission_probability = np.array([[ep_p0, ep_p1, ep_p2, ep_p3, ep_p4, ep_p5],
                                 [ep_v0, ep_v1, ep_v2, ep_v3, ep_v4, ep_v5],
                                 [ep_s0, ep_s1, ep_s2, ep_s3, ep_s4, ep_s5]])

In [11]:
print('------------------------')
print('Emission probabilities')
print(ep_p0, ep_p1, ep_p2, ep_p3, ep_p4, ep_p5)
print(ep_v0, ep_v1, ep_v2, ep_v3, ep_v4, ep_v5)
print(ep_s0, ep_s1, ep_s2, ep_s3, ep_s4, ep_s5)

------------------------
Emission probabilities
0.0 0.12110091743119267 0.26422018348623855 0.26972477064220185 0.1614678899082569 0.1834862385321101
0.47191830780452226 0.4573304157549234 0.034281546316557256 0.03574033552151714 0.0007293946024799417 0.0
0.004672897196261682 0.018691588785046728 0.49065420560747663 0.48598130841121495 0.0 0.0


### Training the HMM Model Using the Calculated Prbbilities

In [12]:
model = hmm.CategoricalHMM(n_components=len(states), n_features=len(observations))
model.startprob_ = state_probability
model.transmat_ = transition_probability
model.emissionprob_ = emission_probability

In [13]:
model.fit(events_num)

Even though the 'startprob_' attribute is set, it will be overwritten during initialization because 'init_params' contains 's'
Even though the 'transmat_' attribute is set, it will be overwritten during initialization because 'init_params' contains 't'
Even though the 'emissionprob_' attribute is set, it will be overwritten during initialization because 'init_params' contains 'e'


In [14]:
log_probability, hidden_states = model.decode(events_num)
hidden_states = model.predict(events_num)
print("Most likely hidden states:", hidden_states)

Most likely hidden states: [0 2 1 ... 2 1 2]


In [15]:
element_counts = {}

for element in hidden_states:
    if element in element_counts:
        element_counts[element] += 1
    else:
        element_counts[element] = 1

# Print the counts
for element, count in element_counts.items():
    print(f"{element}: {count}")

0: 247
2: 932
1: 951


### Getting the Log-Likelihood Score of the Model

In [16]:
log_likelihood = model.score(events_num)
log_likelihood

-3112.9795782720657