In [1]:
# "A segment-based fitness measure for capturing repetitive structs of music recordings"
# by Meinard Müller, Peter Grosche, Nanzhu Jiang

import numpy as np
import librosa
import matplotlib.pyplot as plt
from ssm import ssm

class audio_thumb_muller:
    def __init__(self, audio_path, t = 'chroma', k = 10):
        self.ssm = ssm(audio_path, k, t)

        # S = np.array([[0.1, 1, 0.3, 0.4, 0.5],
        #               [0.3, 0.3, 1, 0.6, 0.5],
        #               [0.6, 0.4, 0.6, 1, 0.3],
        #               [0.7, 0.5, 0.5, 0.3, 1],
        #               [0.1, 1, 0.3, 0.4, 0.5],
        #               [0.3, 0.3, 1, 0.6, 0.5],
        #               [0.6, 0.4, 0.6, 1, 0.3],
        #               [0.7, 0.5, 0.5, 0.3, 1]])
        # self.max_path_family(S, 4)

    def calculate_path(self, pos, D):
        [N, M] = D.shape
        path = []
        new_path = []
        path_alpha = []
        path.append(pos)
        (i, j) = pos
        ok = 1
        while(ok):
            tmp = []
            if j == 0 and i < N - 1:
                if D[i + 1, j] > D[i + 1, M - 1]:
                    path.append((i + 1, j))
                else:
                    path.append((i + 1, M - 1))
                    path_alpha.append(path.copy())
                    path = []

                #path.append((i + 1, j) if D[i + 1, j] > D[i + 1, M - 1] else (i + 1, M - 1))
                (i, j) = (i + 1, j) if D[i + 1, j] > D[i + 1, M - 1] else (i + 1, M - 1)
            elif j == 1 and i < N - 1:
                path.append((i, j))
                (i, j) = (i, j - 1)
            else:
                if i + 1 < N and j - 1 >= 0: tmp.append((D[i + 1, j - 1], (i + 1, j - 1)))
                if i + 1 < N and j - 2 >= 0: tmp.append((D[i + 1, j - 2], (i + 1, j - 2)))
                if i + 2 < N and j - 1 >= 0: tmp.append((D[i + 2, j - 1], (i + 2, j - 1)))

                if tmp != []:
                    pos = max(tmp)
                    path.append(pos[1])
                    i, j = pos[1]
                else:
                    xmax = max(path, key = lambda item:item[1])
                    xmin = min(path, key = lambda item:item[1])
                    if xmax[1] + xmin[1] == M - 1 or xmax[1] + xmin[1] == M - 2:
                        path_alpha.append(path.copy())
                    ok = 0
                    
            if i == 0 and j == 0:
                ok = 0

        for path in path_alpha:
            for x in path:
                new_path.append((N - 1 - x[0], x[1] - 1))
        return new_path

    ## Verificar isso aqui!
    def calculate_coverage(self, path_family, alpha, N):
        gamma = len(path_family) - len(path_family)/alpha
        return (gamma - alpha)/N

    def calculate_score(self, path_family, score_opt, alpha):
        return (score_opt - alpha)/len(path_family)

    def calculate_fitness(self, gamma, mi):
        return 2*(gamma * mi/(gamma + mi))

    def print_status(self, M, low, alpha):
        pct = 100*low/(M - alpha)
        print("{0:.2f}".format(pct), end = ' ')


    def visualize(self, S):
        plt.figure(figsize=(12, 8))
        librosa.display.specshow(S, x_axis='frames', y_axis='frames', n_xticks=12)
        plt.title('SSM')
        plt.set_cmap('hot_r')
        plt.colorbar()
        plt.show()


    def display_path(self, path_family, low):
        S = self.ssm.s.copy()
        new_cells = [(cell[0], cell[1] + low) for cell in path_family]
        for cell in new_cells:
            S[cell] = 5
        self.visualize(S)

    def max_path_family(self, S, alpha):
        [N, M] = S.shape
        D = np.zeros((N, alpha + 1))
        fitness_list = []
        for low in range(0, M - alpha + 1):
            Sa = S[:, low:low + alpha]
            D[N - 1, 2:alpha + 1] = -np.inf
            for i in range(N - 1, -1, -1):
                for j in range(alpha + 1):
                    if (j == 0 and i + 1 < N):
                        D[i, j] = max(D[i + 1,0], D[i + 1, alpha])
                    elif (j == 1):
                        D[i, j] = D[i, 0] + Sa[N - 1 - i, 0]
                    elif (i != N - 1 and j != 0):
                        D[i, j] = Sa[N - 1 - i, j - 1] + max(D[i + 1, j - 1] if i + 1 < N and j - 1 > 0 else 0,
                                                             D[i + 1, j - 2] if i + 1 < N and j - 2 > 0 else 0,
                                                             D[i + 2, j - 1] if i + 2 < N and j - 1 > 0 else 0)
            possible_max = [D[0, alpha], D[0, 0]]
            score_opt = max(possible_max)
            arg = np.argmax(possible_max)
            path_family = self.calculate_path((0, 0) if arg else (0, alpha), D)
            gamma = self.calculate_coverage(path_family, alpha, N)
            mi = self.calculate_score(path_family, score_opt, alpha)
            fitness = self.calculate_fitness(gamma, mi)
            fitness_list.append((fitness, low))
            self.print_status(M, low, alpha)
            #self.display_path(path_family, low)
        return fitness_list

    def thumb_alpha(self, alpha):
        fitness_list = self.max_path_family(self.ssm.s, alpha)
        (max_fit, max_low) = max(fitness_list, key = lambda item:item[0])
        print("Thumbnail init: " + str(self.frame_to_time(max_low)) + " with: " + str(max_fit) + " of fitness value.")

    def thumb_time(self, time):
        fitness_list = self.max_path_family(self.ssm.s, self.time_to_frame(time))
        (max_fit, max_low) = max(fitness_list, key = lambda item:item[0])
        print("Thumbnail init: " + str(self.frame_to_time(max_low)) + " with: " + str(max_fit) + " of fitness value.")

    def frame_to_time(self, f):
        dt = self.ssm.duration/self.ssm.s.shape[0]
        return dt*f

    def time_to_frame(self, time):
        df = self.ssm.s.shape[0]/self.ssm.duration
        return int(df*time)


In [2]:
at = audio_thumb_muller('sun.wav', k = 10)

Calculating features...
Features calculated.
Calculating SSM...
SSM calculated.


In [3]:
at.thumb_time(40)

0.00 0.16 0.32 0.48 0.64 0.80 0.96 1.12 1.28 1.44 1.60 1.76 1.92 2.08 2.24 2.40 2.56 2.72 2.88 3.04 3.20 3.36 3.52 3.68 3.84 4.00 4.16 4.32 4.48 4.64 4.80 4.96 5.12 5.28 5.44 5.60 5.76 5.92 6.08 6.24 6.40 6.56 6.72 6.88 7.04 7.20 7.36 7.52 7.68 7.84 8.00 8.16 8.32 8.48 8.64 8.80 8.96 9.12 9.28 9.44 9.60 9.76 9.92 10.08 10.24 10.40 10.56 10.72 10.88 11.04 11.20 11.36 11.52 11.68 11.84 12.00 12.16 12.32 12.48 12.64 12.80 12.96 13.12 13.28 13.44 13.60 13.76 13.92 14.08 14.24 14.40 14.56 14.72 14.88 15.04 15.20 15.36 15.52 15.68 15.84 16.00 16.16 16.32 16.48 16.64 16.80 16.96 17.12 17.28 17.44 17.60 17.76 17.92 18.08 18.24 18.40 18.56 18.72 18.88 19.04 19.20 19.36 19.52 19.68 19.84 20.00 20.16 20.32 20.48 20.64 20.80 20.96 21.12 21.28 21.44 21.60 21.76 21.92 22.08 22.24 22.40 22.56 22.72 22.88 23.04 23.20 23.36 23.52 23.68 23.84 24.00 24.16 24.32 24.48 24.64 24.80 24.96 25.12 25.28 25.44 25.60 25.76 25.92 26.08 26.24 26.40 26.56 26.72 26.88 27.04 27.20 27.36 27.52 27.68 27.84 28.00 28.16 2

In [None]:
at.ssm.s.shape

In [None]:
at.ssm.visualize()

In [None]:
at.ssm.threshold(0.7)

In [None]:
at.ssm.visualize()