In [1]:
import numpy as np
def paa(x, w):
    #w is the number of segments
    #first calculate the length of the time series
    n = len(x)
    #now calculate the length of each segment
    step = n/w
    #now calculate the paa
    paa = []
    for i in range(w):
        paa.append(np.mean(x[int(i*step):int((i+1)*step)]))
    return paa
    

In [3]:
from elasticsearch import Elasticsearch

es = Elasticsearch(
    "https://localhost:9200/",
    ca_certs="C:\Elastic\elasticsearch-8.11.3\config\certs\http_ca.crt",
    basic_auth=("elastic", "+OCC12Crng*=nDYGEvDs")
    )
print(es.ping())

#create a dense vector index for the paa
mappings ={
    "mappings": {
        "properties": {
            "paa": {
                "type": "dense_vector",
                "dims": 4,
                "similarity": "l2_norm"
            },
            "label": {
                "type": "keyword"
            }
        }
    }
}
#if the index already exists, delete it
if es.indices.exists(index="paa"):
    es.indices.delete(index="paa")
es.indices.create(index="paa", body=mappings)

True


ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'paa'})

In [5]:
import os
from rtree import index

ucr_idx = index.Index()

for filename in os.listdir("./test_dataset/UCR"):
    f =open("./test_dataset/UCR/" + filename + "/" + filename+"_TEST")
    content = f.read()
    content = content.split("\n")
    content = content[0]
    content = content.split(",")
    content = content[1:]
    content = list(map(float, content))
    paa_4 = paa(content, 4)
    es.index(index="paa", body={"paa": paa_4, "label": filename})
    
    #ucr_idx.insert(counter, (paa_4[0], paa_4[1], paa_4[2], paa_4[3]),obj=filename)
    f.close()


In [4]:
#now we have the index, we can search for the nearest neighbor
#first we need to create the query
f = open("./test_dataset/UCR/Adiac/Adiac_TEST")       
content = f.read()
content = content.split("\n")
content = content[4]
content = content.split(",")
content = content[1:]
content = list(map(float, content))
paa_4 = paa(content, 4)
f.close()

#now we can search for the nearest neighbor
res = es.search(index="paa", 
                body={"knn":{"field": "paa","query_vector": paa_4,"k": 4,"num_candidates": 10},
                      "fields":["label"]})

for hit in res["hits"]["hits"]:
    print(hit["_source"]["label"]+" "+str(hit["_score"]))

DiatomSizeReduction 0.9847764
FacesUCR 0.9797125
FISH 0.9577552
FaceFour 0.9457813


In [None]:
mappings ={
    "mappings": {
        "properties": {
            "midi_paa": {
                "type": "dense_vector",
                "dims": 16,
                "similarity": "l2_norm"
            },
            "label": {
                "type": "keyword"
            }
        }
    }
}

In [12]:
import os 
import pretty_midi

for filename in os.listdir("./test_dataset/dataset_pop/"):
    try:
        midi_data = pretty_midi.PrettyMIDI("test_dataset/dataset_pop/" + filename)
    except:
        continue
    print(filename)
    song_duration = midi_data.get_end_time()
    segment_duration = 15
    tempo = midi_data.estimate_tempo()
 
        
    for i in range(0, int(song_duration) - segment_duration):
        prev = 1
        segment = []
        for j in range(prev, len(midi_data.instruments[0].notes)):
            note = midi_data.instruments[0].notes[j]
            if note.start > i and note.end < i + segment_duration:
                #segment.append(note.pitch - midi_data.instruments[0].notes[j - 1].pitch)
                segment.append(round(note.pitch,2))
        if len(segment) < 16:
            continue
        segment = np.array(segment)
        segment = paa(segment, 16)
        segment_4 = paa(segment, 4)
        print(segment, segment_4)
        prev = j + 1

ABBA_-_Chiquita.mid
[74.0, 76.0, 78.0, 76.0, 74.0, 73.0, 76.0, 73.5, 76.0, 78.0, 76.0, 74.0, 73.0, 71.0, 80.0, 79.0] [76.0, 74.125, 76.0, 75.75]
[78.0, 76.0, 74.0, 73.0, 76.0, 73.0, 74.0, 76.0, 78.0, 76.0, 74.0, 73.0, 71.0, 80.0, 78.0, 80.5] [75.25, 74.75, 75.25, 77.375]
[73.0, 74.0, 76.0, 78.0, 76.0, 74.0, 73.0, 71.0, 80.0, 78.0, 80.0, 81.0, 78.0, 76.0, 71.0, 73.0] [75.25, 73.5, 79.75, 74.5]
[73.0, 74.0, 76.0, 78.0, 76.0, 74.0, 73.0, 71.0, 80.0, 78.0, 80.0, 81.0, 78.0, 76.0, 71.0, 73.5] [75.25, 73.5, 79.75, 74.625]
[73.0, 74.0, 76.0, 77.0, 74.0, 73.0, 71.0, 79.0, 80.0, 81.0, 78.0, 73.5, 73.0, 74.0, 74.0, 73.5] [75.0, 74.25, 78.125, 73.625]
[74.0, 76.0, 78.0, 75.0, 73.0, 71.0, 80.0, 79.0, 81.0, 78.0, 76.0, 72.0, 74.0, 74.0, 74.0, 72.0] [75.75, 75.75, 76.75, 73.5]
[78.0, 76.0, 74.0, 73.0, 71.0, 79.0, 80.0, 81.0, 78.0, 76.0, 72.0, 74.0, 74.0, 74.0, 73.0, 72.0] [75.25, 77.75, 75.0, 73.25]
[74.0, 73.0, 71.0, 80.0, 78.0, 80.0, 81.0, 78.0, 76.0, 71.0, 73.0, 74.0, 74.0, 74.0, 73.0, 72.0] [74.



Air_Supply_-_All_Out_Of_Love.mid
[35.0, 35.0, 35.0, 35.0, 35.0, 37.0, 35.0, 37.0, 35.0, 35.0, 37.0, 35.0, 37.0, 35.0, 35.0, 37.0] [35.0, 36.0, 35.5, 36.0]
[35.0, 35.0, 37.0, 35.0, 37.0, 35.0, 35.0, 37.0, 35.0, 37.0, 35.0, 35.0, 37.0, 35.0, 37.0, 35.0] [35.5, 36.0, 35.5, 36.0]
[35.0, 35.0, 37.0, 35.0, 37.0, 35.0, 35.0, 37.0, 35.0, 37.0, 35.0, 35.0, 37.0, 35.0, 37.0, 35.0] [35.5, 36.0, 35.5, 36.0]
[35.0, 37.0, 35.0, 37.0, 35.0, 35.0, 37.0, 35.0, 37.0, 35.0, 35.0, 37.0, 35.0, 37.0, 35.0, 36.0] [36.0, 35.5, 36.0, 35.75]
[37.0, 35.0, 37.0, 35.0, 35.0, 37.0, 35.0, 37.0, 35.0, 35.0, 37.0, 35.0, 37.0, 35.0, 35.0, 36.0] [36.0, 36.0, 35.5, 35.75]
[35.0, 37.0, 35.0, 35.0, 37.0, 35.0, 37.0, 35.0, 35.0, 37.0, 35.0, 37.0, 35.0, 35.0, 37.0, 36.0] [35.5, 36.0, 36.0, 35.75]
[37.0, 35.0, 35.0, 37.0, 35.0, 37.0, 35.0, 36.0, 35.0, 37.0, 35.0, 35.0, 37.0, 35.0, 37.0, 35.0] [36.0, 35.75, 35.5, 36.0]
[35.0, 35.0, 37.0, 35.0, 37.0, 35.0, 35.0, 36.0, 37.0, 35.0, 35.0, 37.0, 35.0, 37.0, 35.0, 36.0] [35.5, 35.75