# MMC et reconnaissance de lettres

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pickle as pkl

## Chargement de données

In [4]:
# truc pour un affichage plus convivial des matrices numpy
np.set_printoptions(precision=2, linewidth=320)
plt.close('all')

with open('ressources/lettres.pkl', 'rb') as f:
    data = pkl.load(f, encoding='latin1')
X = np.array(data.get('letters')) # récupération des données sur les lettres
Y = np.array(data.get('labels')) # récupération des étiquettes associées

nCl = 26
#print X
print ('X0 = ',X[0])
print ('Y = ',Y)

X0 =  [  36.21  347.72  322.09  312.23  314.85  315.49  313.56  326.53  141.29  167.61  199.32  217.91  226.44  235.    252.35  270.05  291.67  350.93   17.89   20.28   28.21   43.88   53.46]
Y =  ['a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'b' 'b' 'b' 'b' 'b' 'b' 'b' 'b' 'b' 'b' 'b' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'd' 'd' 'd' 'd' 'd' 'd' 'd' 'd' 'd' 'd' 'd' 'e' 'e' 'e' 'e' 'e' 'e' 'e' 'e' 'e' 'e' 'e' 'f' 'f' 'f' 'f' 'f' 'f' 'f' 'f' 'f' 'f' 'f' 'g' 'g' 'g' 'g' 'g' 'g' 'g' 'g' 'g' 'g' 'g' 'h' 'h' 'h'
 'h' 'h' 'h' 'h' 'h' 'h' 'h' 'h' 'i' 'i' 'i' 'i' 'i' 'i' 'i' 'i' 'i' 'i' 'j' 'j' 'j' 'j' 'j' 'j' 'j' 'j' 'j' 'j' 'k' 'k' 'k' 'k' 'k' 'k' 'k' 'k' 'k' 'k' 'l' 'l' 'l' 'l' 'l' 'l' 'l' 'l' 'l' 'l' 'm' 'm' 'm' 'm' 'm' 'm' 'm' 'm' 'm' 'm' 'n' 'n' 'n' 'n' 'n' 'n' 'n' 'n' 'n' 'n' 'o' 'o' 'o' 'o' 'o' 'o' 'o' 'o' 'o' 'o' 'p' 'p'
 'p' 'p' 'p' 'p' 'p' 'p' 'p' 'p' 'q' 'q' 'q' 'q' 'q' 'q' 'q' 'q' 'q' 'q' 'r' 'r' 'r' 'r' 'r' 'r' 'r' 'r' 'r' 'r' 's' 's' 's' 's' 's' 's' 's' 's' 's' 's' 't' 't' 

## Apprentissage d'un modèle connaissant les états

### Hypothèse Gauche-Droite

In [6]:
def initGD(X,N):
	S=[]		
	for x in X:
		S.append(np.floor(np.linspace(0,N-.00000001,len(x))))
	return np.array(S)

S=initGD(X,4)
print (len(S[0]))

23


In [7]:
def discretise(X,d):
	intervalle=360./d
	Xd=np.array(X)
	for i in range(len(X)):
		Xd[i]=np.floor(X[i]/intervalle)
	return Xd


K = 10 
Xd=discretise(X,K)
print ('X0 = ',Xd[0])
print ('S0 = ',S[0])

X0 =  [ 1.  9.  8.  8.  8.  8.  8.  9.  3.  4.  5.  6.  6.  6.  7.  7.  8.  9.  0.  0.  0.  1.  1.]
S0 =  [ 0.  0.  0.  0.  0.  0.  1.  1.  1.  1.  1.  1.  2.  2.  2.  2.  2.  3.  3.  3.  3.  3.  3.]


### Apprentissage

In [8]:
def learnHMM(allx, allS, N, K, initTo0=False):
	if initTo0:
		A = np.zeros((N,N))
		B = np.zeros((N,K))
		Pi = np.zeros(N)
	else:
		eps = 1e-8
		A = np.ones((N,N))*eps
		B = np.ones((N,K))*eps
		Pi = np.ones(N)*eps
	for i in range(len(allS)):
		S=allS[i]
		X=allx[i]
		k=S[0]
		Pi[k]+=1.
		j=0
		for s in S[1:]:
			A[k][s]+=1. 
			B[k][X[j]]+=1.
			k=s
			j+=1
		B[k][X[j]]+=1.
	for i in range(N):
		A[i]=A[i]/max(A[i].sum(),1)
		B[i]=B[i]/max(B[i].sum(),1)
	Pi=Pi/max(Pi.sum(),1)
	return Pi,A,B

K = 10 # discretisation (=10 observations possibles)
N = 5  # 5 etats possibles (de 0 a 4 en python)
# Xd = angles observes discretises

S=initGD(X,N)
Xd=discretise(X,K)

Pi, A, B = learnHMM(Xd[Y=='a'],S[Y=='a'],N,K,True)

print ('Pi = ',Pi)
print ('A = ',A)
print ('B = ',B)

Pi =  [ 1.  0.  0.  0.  0.]
A =  [[ 0.79  0.21  0.    0.    0.  ]
 [ 0.    0.76  0.24  0.    0.  ]
 [ 0.    0.    0.77  0.23  0.  ]
 [ 0.    0.    0.    0.76  0.24]
 [ 0.    0.    0.    0.    1.  ]]
B =  [[ 0.06  0.02  0.    0.    0.    0.    0.    0.04  0.49  0.4 ]
 [ 0.    0.04  0.    0.13  0.09  0.13  0.02  0.09  0.41  0.09]
 [ 0.    0.    0.    0.02  0.12  0.5   0.31  0.04  0.    0.  ]
 [ 0.07  0.    0.    0.    0.    0.    0.26  0.33  0.2   0.15]
 [ 0.73  0.12  0.    0.    0.    0.    0.    0.02  0.02  0.12]]




### Viterbi (en log)

In [9]:
def viterbi(x,Pi,A,B):
	T=len(x)
	n=len(Pi)
	sigma=np.zeros((T,n))
	sigma[0]=np.log(Pi)+np.log(B[:,x[0]])
	phi=-np.ones((T,n)) 

	for t in range(1,T):
		for j in range(n):
			argmax = (sigma[t-1] + np.log(A[:,j])).argmax()
			sigma[t][j]= sigma[t-1,argmax] + np.log(A[argmax,j]) + np.log(B[j,x[t]])
			phi[t][j]= argmax
	
	S=max(sigma[T-1])
	s=np.zeros(T)
	s[T-1]=sigma[T-1].argmax(0)
	i=T-2
	while i>=0:
		s[i]=phi[i+1][s[i+1]]
		i-=1
	return s,S

s_est, p_est = viterbi(Xd[0], Pi, A, B)

print ('s_est = ',s_est)
print ('p_est = ',p_est)

s_est =  [ 0.  0.  0.  0.  0.  0.  0.  0.  1.  2.  2.  2.  2.  2.  3.  3.  3.  3.  4.  4.  4.  4.  4.]
p_est =  -38.0935655456


