<a href="https://colab.research.google.com/github/Hadjadj-Benakmoum/MobilityPredictionUsingML/blob/main/Hidden_Markov_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import time
import os

# Mount Google drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Load/split sequences functions

In [None]:
def Load_Data(file):
  locations = []
  days = []
  with open(file) as f:  
     for line in f:
         locations.append(line.split()[1]) 
         if (line.split()[0]).isnumeric:
             days.append(time.strftime("%A", time.localtime(int(line.split()[0])))) 
  return locations,days

def Split_sequence(sequence,train_percentage):
  split_index = int(train_percentage * len(sequence))
  train_sequence = sequence[:split_index]
  test_sequence  = sequence[split_index:]
  return train_sequence,test_sequence

# Simple markove training function

In [None]:
def train_markov_model(locations, w,model=None):
  seq_size = len(locations)
  if(model==None):
    model={}

  for i in range(int(seq_size-1) - w):
      context = ','.join(locations[i:i+w])
      next_location = locations[i+w]
      if(context in model.keys()):
          if(next_location in model[context].keys()):
              model[context][next_location]+=1
          else:
              model[context][next_location]=1
      else:
          model[context]={next_location:1}
  return model

# Simple predict function based on markov

In [None]:
def predict_next_location(model,context):
  if (context in model.keys() ):
     next_locations_dic = model[context]
     next_loc=max(next_locations_dic,key=next_locations_dic.get)
  else:
    next_loc=None
  return next_loc

# Function to train HMM on one user

In [None]:
def extract_one_day_sequence(locations,days,i):
  ind=i
  while (ind < (len(days)-1)) and (days[ind]==days[ind+1]):
    ind = ind+1
  return locations[i:ind+1],ind+1

def train_HMM(locations,days,w,models=None):
  if(models == None):
    models={'Monday':{},'Tuesday':{},'Wednesday':{},'Thursday':{},'Friday':{},'Saturday':{},'Sunday':{}}

  i=0
  while i< (len(days)-w):
    day = days[i]
    day_locations,i = extract_one_day_sequence(locations,days,i)
    #if the lenght of the day is less than W
    if(len(day_locations)<=w):
      #print('Short day')
      continue
    models[day] = train_markov_model(day_locations,w,model=models[day])
  return models

# Convert a HMM model to simple markov model

In [None]:
#  this function to convert HMM models to just one markov model 
# To do this conversion, all days dictionaries are combined in one dictionary
# This function is used to produce a markov model used when the day model does not contain a context (but this context exist in other days) 

def convert_hmm_to_markov(models):
  markov_model={}
  for day in models:
    for context in models[day]:
      if context in markov_model:
        for next_location in models[day][context]:
          if next_location in markov_model[context]:
            markov_model[context][next_location] += models[day][context][next_location]
          else:
            markov_model[context][next_location] = models[day][context][next_location]
      else:
        markov_model[context]=models[day][context]

  return markov_model

# Function to test HMM on one day

In [None]:
def test_one_day(model,day_locations,w,otherwise_markov=False,markov_model=None):
  sequence_lenght = len(day_locations)
  correct=0
  incorrect=0
  undefined=0
  for i in range(sequence_lenght-w):
    context = ','.join(day_locations[i:i+w])
    prediction = predict_next_location(model,context)

    if prediction is None :
      if otherwise_markov:
        prediction = predict_next_location(markov_model,context)

    real_location = day_locations[i+w]

    if (prediction is None ):
        undefined +=1
    else:
        if (prediction == real_location ):
          correct +=1
        else:
          incorrect +=1
  return correct,incorrect,undefined  

# Function to test HMM on one user 

In [None]:
def test_user_HMM(models,test_locations,test_days,w,otherwise_markov=False,markov_model=None,activate_HMM=True):
  correct=0
  incorrect=0
  undefined=0
  short_days = 0
  i=0
  while (i < len(test_locations)):
      #print(i,len(test_days),len(test_locations))
      day = test_days[i]
      day_locations,i=extract_one_day_sequence(test_locations,test_days,i)
      
      if(len(day_locations)<=w):
        #print('Short day')
        short_days +=1
        undefined +=len(day_locations)
        continue
      
      if (activate_HMM):
        model = models[day]
      else:
        model = markov_model

      day_correct,day_incorrect,day_undefined  = test_one_day(model,day_locations,w,otherwise_markov,markov_model)
      correct += day_correct
      incorrect += day_incorrect
      undefined += day_undefined

  accuracy = correct/len(test_locations)
  return correct,incorrect,undefined,short_days,accuracy  

# HMM training verification

In [None]:
w = 8
file_name = '/content/drive/My Drive/Mobility test/0000c5112528.log.mv'
locations,days = Load_Data(file_name)

train_locations,test_locations = Split_sequence(locations,train_percentage=0.8)
train_days,test_days = Split_sequence(days,train_percentage=0.8)

print(len(train_locations),len(test_locations))
print(len(train_days),len(test_days))

models = None
models = train_HMM(train_locations,train_days,w=8)
markov_model = convert_hmm_to_markov(models)
print(test_user_HMM(models,test_locations,test_days,w=8,otherwise_markov=False,markov_model=markov_model,activate_HMM=False))
print(test_user_HMM(models,test_locations,test_days,w=8,otherwise_markov=False,markov_model=markov_model))
print(test_user_HMM(models,test_locations,test_days,w=8,otherwise_markov=True,markov_model=markov_model))

1524 382
1524 382
(369, 3, 2, 1, 0.9659685863874345)
(362, 2, 10, 1, 0.9476439790575916)
(369, 3, 2, 1, 0.9659685863874345)


# **Test Scenario 1** : Train/Test in each user


In [None]:
#Test configuration
w=1
train_percentage=0.8
threshold_lenght = 1000
method = 'HMM'


#undefined_strategy = 'None'
undefined_strategy = 'Markov'


path='/content/drive/My Drive/Mobility test/'
activate_HMM = True
otherwise_test_markov = False
if(undefined_strategy == 'Markov'):
  otherwise_test_markov = True

test_configuration ='Scenario:{0} W:{1} Method:{2} undefined_strategy:{3}.csv'.format(1,w,method,undefined_strategy)
print('Test strategy : ', test_configuration)

Test strategy :  Scenario:1 W:1 Method:HMM undefined_strategy:Markov.csv


In [None]:
import csv
def save_results_csv(output_file,users_files,Corrects,Incorrects,Undefined,Shortdays,Accuracies):
  with open(output_file, 'w') as csvfile:
      header = ['output_file','users_files','Corrects','Incorrects','Undefined','Shortdays','Accuracies']
      writer = csv.writer(csvfile, delimiter=',')
      writer.writerow(header)
      outputs = zip(users_files,Corrects,Incorrects,Undefined,Shortdays,Accuracies)
      writer.writerows(outputs)

In [None]:
Accuracies=[]
Corrects=[]
Incorrects=[]
Undefined=[]
Shortdays=[]
users_files =  os.listdir(path)
print('Number of files :',len(users_files))
for filename in users_files:
    if filename.endswith("mv"):
      #Read sequence
      locations,days = Load_Data(os.path.join(path,filename))
      
      #Ignore sequence if the lenght is less than threshold_lenght
      if(len(locations)<threshold_lenght):
        continue

      #Split the sequence Train/Test
      train_locations,test_locations = Split_sequence(locations,train_percentage)
      train_days,test_days = Split_sequence(days,train_percentage)

      #Train the HMM model 
      models = None
      models = train_HMM(train_locations,train_days,w)

      #Convert the HMM model to normal markov model. This simple markove model use is used if a context is absent in the day model
      markov_model = train_markov_model(train_locations,w)

      #Test the Model 
      user_correct,user_incorrect,user_undefined,user_short_day,user_accuracy = test_user_HMM(models,
                                                                                              test_locations,
                                                                                              test_days,
                                                                                              w,
                                                                                              otherwise_markov=otherwise_test_markov,
                                                                                              markov_model=markov_model,
                                                                                              activate_HMM= activate_HMM)
      
      #Collect results
      Corrects.append(user_correct)
      Incorrects.append(user_incorrect)
      Undefined.append(user_undefined)
      Shortdays.append(user_short_day)
      Accuracies.append(user_accuracy)
      
      #Display results
      print('User : ', filename)
      print('User lenght : ',len(locations))
      print('Correct : ',user_correct )
      print('Incorrect : ',user_incorrect)
      print('Undefined : ',user_undefined)
      print('User short days : ',user_short_day)
      print('Accuracy : ',user_accuracy)
      print("======================================================================================================")

#Save final results in csv file where the name contains information about the test (w,method ....). The name of file is : Configuration.csv
save_results_csv(test_configuration,users_files,Corrects,Incorrects,Undefined,Shortdays,Accuracies)

print('Global results : ')
print("======================================================================================================")
print('Corrects : ',sum(Corrects))
print('Incorrects : ',sum(Incorrects))
print('Short days : ',sum(Undefined))
print('Undefined_Sequences : ',sum(Shortdays))
print('Average Acc : ',sum(Accuracies)/len(Accuracies))



[1;30;43mLe flux de sortie a été tronqué et ne contient que les 5000 dernières lignes.[0m
User :  000750615ac5.log.mv
User lenght :  1022
Correct :  33
Incorrect :  63
Undefined :  77
User short days :  0
Accuracy :  0.16097560975609757
User :  00070e661235.log.mv
User lenght :  1795
Correct :  92
Incorrect :  215
Undefined :  12
User short days :  2
Accuracy :  0.2562674094707521
User :  000625a559b5.log.mv
User lenght :  1844
Correct :  288
Incorrect :  49
Undefined :  14
User short days :  1
Accuracy :  0.7804878048780488
User :  00053cb1ad35.log.mv
User lenght :  2856
Correct :  229
Incorrect :  299
Undefined :  3
User short days :  0
Accuracy :  0.40034965034965037
User :  00070e703c25.log.mv
User lenght :  9966
Correct :  915
Incorrect :  1001
Undefined :  50
User short days :  7
Accuracy :  0.458876629889669
User :  000625cb41a5.log.mv
User lenght :  3999
Correct :  782
Incorrect :  0
Undefined :  0
User short days :  0
Accuracy :  0.9775
User :  0002b37d5ad5.log.mv
User lengh

# **Test Scenario 2** : Train on users and Test in other users

In [None]:
w=5
train_percentage_users = 0.5
threshold_lenght = 1000

method = 'HMM'

#undefined_strategy = 'None'
undefined_strategy = 'Markov'

path='/content/drive/My Drive/Mobility test/'


activate_HMM = True
otherwise_test_markov = False
if(undefined_strategy == 'Markov'):
  otherwise_test_markov = True

test_configuration ='Scenario:{0} W:{1} Method:{2} undefined_strategy:{3}.csv'.format(2,w,method,undefined_strategy)
print(test_configuration)

Scenario:2 W:5 Method:HMM undefined_strategy:Markov.csv


In [None]:
def train_on_many_users(train_files):

  global_models = None
  for filename in train_files:
    if filename.endswith("mv"):
      train_locations,train_days = Load_Data(os.path.join(path,filename))
      global_models = train_HMM(train_locations,train_days,w,global_models) 
  return global_models 

In [None]:
list_of_files=os.listdir(path)
train_files,test_files =  Split_sequence(list_of_files,train_percentage_users)


print('Start model training')
global_models = train_on_many_users(train_files)
print('End model training')

Accuracies=[]
Corrects=[]
Incorrects=[]
Undefined=[]
Shortdays=[]
for filename in test_files :
    if filename.endswith("mv"):
      #Read sequence
      locations,days = Load_Data(os.path.join(path,filename))
      
      #Ignore sequence if the lenght is less than threshold_lenght
      if(len(locations)<threshold_lenght):
        continue

      #Split the sequence Train/Test
      train_locations,test_locations = Split_sequence(locations,train_percentage)
      train_days,test_days = Split_sequence(days,train_percentage)

      #Train the HMM model 
      models = None
      models = train_HMM(train_locations,train_days,w)

      #Convert the HMM model to normal markov model. This simple markove model use is used if a context is absent in the day model
      markov_model = train_markov_model(train_locations,w)

      #Test the Model 
      user_correct,user_incorrect,user_undefined,user_short_day,user_accuracy = test_user_HMM(models,
                                                                                              test_locations,
                                                                                              test_days,
                                                                                              w,
                                                                                              otherwise_markov=otherwise_test_markov,
                                                                                              markov_model=markov_model,
                                                                                              activate_HMM= activate_HMM)
      
      #Collect results
      Corrects.append(user_correct)
      Incorrects.append(user_incorrect)
      Undefined.append(user_undefined)
      Shortdays.append(user_short_day)
      Accuracies.append(user_accuracy)
      
      #Display results
      print('User : ', filename)
      print('User lenght : ',len(locations))
      print('Correct : ',user_correct )
      print('Incorrect : ',user_incorrect)
      print('Undefined : ',user_undefined)
      print('User short days : ',user_short_day)
      print('Accuracy : ',user_accuracy)
      print("======================================================================================================")

#Save final results in csv file where the name contains information about the test (w,method ....)The name of file is : Configuration.csv
save_results_csv(test_configuration,users_files,Corrects,Incorrects,Undefined,Shortdays,Accuracies)

print('Global results : ')
print("======================================================================================================")
print('Corrects : ',sum(Corrects))
print('Incorrects : ',sum(Incorrects))
print('Short days : ',sum(Undefined))
print('Undefined_Sequences : ',sum(Shortdays))
print('Average Acc : ',sum(Accuracies)/len(Accuracies))


[1;30;43mLe flux de sortie a été tronqué et ne contient que les 5000 dernières lignes.[0m
User :  000750615ac5.log.mv
User lenght :  1022
Correct :  4
Incorrect :  8
Undefined :  128
User short days :  19
Accuracy :  0.01951219512195122
User :  00070e661235.log.mv
User lenght :  1795
Correct :  0
Incorrect :  6
Undefined :  213
User short days :  14
Accuracy :  0.0
User :  000625a559b5.log.mv
User lenght :  1844
Correct :  229
Incorrect :  44
Undefined :  31
User short days :  6
Accuracy :  0.6205962059620597
User :  00053cb1ad35.log.mv
User lenght :  2856
Correct :  111
Incorrect :  114
Undefined :  192
User short days :  10
Accuracy :  0.19405594405594406
User :  00070e703c25.log.mv
User lenght :  9966
Correct :  1587
Incorrect :  229
Undefined :  118
User short days :  23
Accuracy :  0.7958876629889668
User :  000625cb41a5.log.mv
User lenght :  3999
Correct :  713
Incorrect :  0
Undefined :  2
User short days :  1
Accuracy :  0.89125
User :  0002b37d5ad5.log.mv
User lenght :  2701