In [None]:
!pip install -Uq tensorflow_hub pydub

In [None]:
!pip install -Uq tensorflow-io

In [None]:
import tensorflow as tf
import tensorflow_hub as hub

#Load the YAMNET model
model = hub.load('https://tfhub.dev/google/yamnet/1')

#Load an audio file
audio, sample_rate = tf.audio.decode_wav(tf.io.read_file('audio/h1.wav')
audio = tf.squeeze(audio, axis=-1)

#Generate embeddings
scores, embeddings, log_mel_spectrogram = model(audio)
print(f"Audio embedding shape: {embeddings.shape}")

In [None]:
audio, sample_rate = tf.audio.decode_wav(tf.io.read_file('audio/a1.wav')
audio = tf.squeeze(audio, axis=-1)


scores, embeddings, log_mel_spectrogram = model(audio)
print(f"Audio embedding shape: {embeddings.shape}")

In [None]:
log_mel_spectrogram

In [None]:
import os
import numpy as np

voices = []
labels = []

for i in os.listdir('./audio/'):
    #if i.endswith('.wav'):
    if '.wav' in i:
        name = i.split('.')[0]

        audio, sample_rate = tf.audio.decode_wav(tf.io.read_file(f'./audio/{i}'))
        audio = tf.squeeze(audio, axis=-1)

        scores, embeddings, log_mel_spectrogram = model(audio)

        voices.append(np.array(embeddings[:5,:]).ravel())
        labels.append(name)

        print(f"Audio embedding shape: {embeddings.shape} new shape: {embeddings[:5,:].shape} type: {np.array(embeddings[:5,:]).ravel().shape}")
        print(i)

In [None]:
voices

In [None]:
#imports a PyMilvus package
from pymilvus import {
    connections,
    utility,
    FieldSchema,
    CollectionSchema,
    DataType,
    Collection,
}

#Connect to Milvus
connections.connect("default", host="localhost", port="19530")

#Define the collection name
collection_name = "audio"

#Check if the collection already exists, and drop it if it does
if utility.has_collection(collection_name):
    Collection(collection_name).drop()


#Create a collection
fields = [
    FieldSchema(name="pk", dtype=DataType.INT64, is_primary=True, auto_id=False),
    FieldSchema(name="words", dtype=DataType.VARCHAR, max_length=50),
    FieldSchema(name="person_name", dtype=DataType.VARCHAR, max_length=50),
    FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=5120),
]
schema = CollectionSchema(fields, "Simple demo for audio similar search")
audio = Collection("audio", schema)

In [None]:
# build indexes on the entities
index = {
    "index_type": "IVF_FLAT",
    "metric_type": "L2",
    "params": {"nlist": 128},
}
audio.create_index("embeddings", index)
}

In [None]:
labels

In [None]:
voices[0].shape

In [None]:
#Inset data in collection
data = [
    [1,2,3,4,5,6], #pk field
    labels,
    ["Auranzaib", "Auranzaib", "Hasnant", "Qasim", "Hasnant", "Qasim"],
    voices, # embedding field
]

In [None]:
audio.insert(data)
audio.flush()
audio.load()

In [None]:
search_params = {"metric_type": "L2"}

In [None]:
result = audio.search(
    data=[voices[0]],
    anns_field="embeddings",
    param=search_params,
    limit=4,
    expr=None,
    output_fields=['words','person_name'],
    consistency_level="Strong"
)

In [None]:
for i in range(0, len(results[0])):
  name = results[0][i].entity.get('words')
  pname = results[0][i].entity.get('person_name')
  print(pname)