In [12]:
import pandas as pd
import numpy as np

from numpy import array, zeros, argmin, inf, equal, ndim
from scipy.spatial.distance import cdist

from sklearn.metrics.pairwise import euclidean_distances
from nltk.metrics.distance import edit_distance

def dtw(x, y, dist):
    """
    Computes Dynamic Time Warping (DTW) of two sequences.
    :param array x: N1*M array
    :param array y: N2*M array
    :param func dist: distance used as cost measure
    Returns the minimum distance, the cost matrix, the accumulated cost matrix, and the wrap path.
    """
    assert len(x)
    assert len(y)
    r, c = len(x), len(y)
    D0 = zeros((r + 1, c + 1))
    D0[0, 1:] = inf
    D0[1:, 0] = inf
    D1 = D0[1:, 1:] # view
    for i in range(r):
        for j in range(c):
            D1[i, j] = dist(x[i], y[j])
    C = D1.copy()
    for i in range(r):
        for j in range(c):
            D1[i, j] += min(D0[i, j], D0[i, j+1], D0[i+1, j])
    if len(x)==1:
        path = zeros(len(y)), range(len(y))
    elif len(y) == 1:
        path = range(len(x)), zeros(len(x))
    else:
        path = _traceback(D0)
    return D1[-1, -1] / sum(D1.shape), C, D1, path #не понимаю, зачем делят на размер д1...хотя
    #return D1[-1, -1], C, D1, path #эта строчка, чтобы проверить с обычным dtw без нормализации

def _traceback(D):
    i, j = array(D.shape) - 2
    p, q = [i], [j]
    while ((i > 0) or (j > 0)):
        tb = argmin((D[i, j], D[i, j+1], D[i+1, j]))
        if (tb == 0):
            i -= 1
            j -= 1
        elif (tb == 1):
            i -= 1
        else: # (tb == 2):
            j -= 1
        p.insert(0, i)
        q.insert(0, j)
    return array(p), array(q)

if __name__ == '__main__':

    base = pd.read_excel("liquid_rates.xls") #считываю инф с файла
    #base = pd.read_excel("qqq.xls") #считываю инф с файла
 
    base = base.values #переход от pd. к np.
    
    i = 0
    j = 0
    results_ = np.zeros((base.shape[0], base.shape[0]))
    
    n=0
    m=0
 
    for n in range (base.shape[0]):
        for m in range (n):
            
    #если надо найти расстояние между строками
    #x = ['we', 'shelled', 'clams', 'for', 'the', 'chowder']
    #y = ['class', 'too']
    #dist_fun = edit_distance
        
        
    #x = ['i', 'soon', 'found', 'myself', 'muttering', 'to', 'the', 'walls']
    #y = ['see', 'drown', 'himself']
    #dist_fun = edit_distance
        
    #x = 'we talked about the situation'.split()
    #y = 'we talked about the situation'.split()
    #dist_fun = edit_distance
    
    #если надо найти расстояние между 2-д массивами
    #x = [[0, 0], [0, 1], [1, 1], [1, 2], [2, 2], [4, 3], [2, 3], [1, 1], [2, 2], [0, 1]]
    #y = [[1, 0], [1, 1], [1, 1], [2, 1], [4, 3], [4, 3], [2, 3], [3, 1], [1, 2], [1, 0]]
    #dist_fun = euclidean_distances
    
    #если надо найти расстояние между 1-д массивами
            #x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 90]
            #y = [0, 1, 2, 3, 4, 5, 6, 7, 8]
    

            x = base[n, 1:]
            y = base[m, 1:]
    
    #убираем nan из массивов
            x_notnan = []
            y_notnan = []
    
            i=0
            while np.isnan(x[i]) == False:
                x_notnan.append(x[i])
                i=i+1
        
            i=0
            while np.isnan(y[i]) == False:
                y_notnan.append(y[i])
                i=i+1
        

            dist_fun = euclidean_distances
   
            dist, cost, acc, path = dtw(x_notnan, y_notnan, dist_fun)
            results_[n][m] = dist
            print ('n ', n , 'm ', m)


    # vizualize
    print (results_)
    #np.savetxt('test.out',results_, delimiter=',', newline = ';')
    df = pd.DataFrame(results_)
    df.to_csv('out.csv')
  
    """
    from matplotlib import pyplot as plt
    plt.imshow(cost.T, origin='lower', cmap=plt.cm.Reds, interpolation='nearest')
    plt.plot(path[0], path[1], '-o') # relation
    plt.xticks(range(len(x)), x)
    plt.yticks(range(len(y)), y)
    plt.xlabel('x')
    plt.ylabel('y')
    plt.axis('tight')
    plt.title('Minimum distance: {}'.format(dist))
    plt.show()
    """
    

n  1 m  0
n  2 m  0
n  2 m  1
n  3 m  0
n  3 m  1
n  3 m  2
n  4 m  0
n  4 m  1
n  4 m  2
n  4 m  3
n  5 m  0
n  5 m  1
n  5 m  2
n  5 m  3
n  5 m  4
n  6 m  0
n  6 m  1
n  6 m  2
n  6 m  3
n  6 m  4
n  6 m  5
n  7 m  0
n  7 m  1
n  7 m  2
n  7 m  3
n  7 m  4
n  7 m  5
n  7 m  6
n  8 m  0
n  8 m  1
n  8 m  2
n  8 m  3
n  8 m  4
n  8 m  5
n  8 m  6
n  8 m  7
n  9 m  0
n  9 m  1
n  9 m  2
n  9 m  3
n  9 m  4
n  9 m  5
n  9 m  6
n  9 m  7
n  9 m  8
n  10 m  0
n  10 m  1
n  10 m  2
n  10 m  3
n  10 m  4
n  10 m  5
n  10 m  6
n  10 m  7
n  10 m  8
n  10 m  9
n  11 m  0
n  11 m  1
n  11 m  2
n  11 m  3
n  11 m  4
n  11 m  5
n  11 m  6
n  11 m  7
n  11 m  8
n  11 m  9
n  11 m  10
n  12 m  0
n  12 m  1
n  12 m  2
n  12 m  3
n  12 m  4
n  12 m  5
n  12 m  6
n  12 m  7
n  12 m  8
n  12 m  9
n  12 m  10
n  12 m  11
n  13 m  0
n  13 m  1
n  13 m  2
n  13 m  3
n  13 m  4
n  13 m  5
n  13 m  6
n  13 m  7
n  13 m  8
n  13 m  9
n  13 m  10
n  13 m  11
n  13 m  12
n  14 m  0
n  14 m  1
n  14 m  2
n  14

In [7]:
#просто еще один скрипт, тоже рабочий
import pandas as pd
import numpy as np
import math

def DTWDistance(s1, s2):
    DTW={}

    for i in range(len(s1)):
        DTW[(i, -1)] = float('inf')
    for i in range(len(s2)):
        DTW[(-1, i)] = float('inf')
    DTW[(-1, -1)] = 0

    for i in range(len(s1)):
        for j in range(len(s2)):
            dist= (s1[i]-s2[j])**2
            DTW[(i, j)] = dist + min(DTW[(i-1, j)],DTW[(i, j-1)], DTW[(i-1, j-1)])

    return math.sqrt(DTW[len(s1)-1, len(s2)-1])

if __name__ == '__main__':
    base = pd.read_excel("liquid_rates.xls") #считываю инф с файла
    s1 = base.iloc[0]
    s2 = base.iloc[1]

    s1 = [0, 1, 2, 3, 4, 5, 6, 7, 8, 90]
    s2 = [0, 1, 2, 3, 4, 5, 6, 7, 8]
    
    q = DTWDistance(s1, s2)
    print (q)

82.0
