In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import sklearn
import os
import sys
import matplotlib.pyplot as plt
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
import seaborn as sns
import keras
import random
from tqdm import tqdm
#from scipy.cluster.hierarchy import dendrogram, linkage
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.cluster import AgglomerativeClustering




In [None]:
#Using a pretrained Resnet50 model
MyModel = tf.keras.models.Sequential()
MyModel.add(tf.keras.applications.ResNet50(
    include_top = False, weights='imagenet',    pooling='avg',
))

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
# freezing weights for 1st layer
MyModel.layers[0].trainable = False

In [None]:
def spec_augment(spec: np.ndarray, num_mask=2, 
                 freq_masking_max_percentage=0.15, time_masking_max_percentage=0.3):

    spec = spec.copy()
    for i in range(num_mask):
        all_frames_num, all_freqs_num = spec.shape
        freq_percentage = random.uniform(0.0, freq_masking_max_percentage)
        
        num_freqs_to_mask = int(freq_percentage * all_freqs_num)
        f0 = np.random.uniform(low=0.0, high=all_freqs_num - num_freqs_to_mask)
        f0 = int(f0)
        spec[:, f0:f0 + num_freqs_to_mask] = 0

        time_percentage = random.uniform(0.0, time_masking_max_percentage)
        
        num_frames_to_mask = int(time_percentage * all_frames_num)
        t0 = np.random.uniform(low=0.0, high=all_frames_num - num_frames_to_mask)
        t0 = int(t0)
        spec[t0:t0 + num_frames_to_mask, :] = 0
    
    return spec

In [None]:
### with this all done lets write the iterrrative loop
def main(path):
    #Create a data frame to save the results
    Pokemons = []
    Flatten_spectros =[]
    
    list_spectros = os.listdir(path)
    for spec in tqdm(list_spectros):
        Pokemons.append(spec.replace(".png",""))
        imagePath = path + '/' + spec

        #Get features from model
        img = cv2.imread(imagePath)
        img = img = np.expand_dims(img, 0)
        img = tf.keras.applications.resnet50.preprocess_input(img)
        extractedFeatures = MyModel.predict(img)
        extractedFeatures = np.array(extractedFeatures)
        Flatten_spectros.append(extractedFeatures.flatten())
    return Pokemons, Flatten_spectros

In [None]:
### lets give the address of our Parent directory and start
path = "/content/drive/MyDrive/SPECTROS"
Pokemons, Flatten_spectros= main(path)

100%|██████████| 5267/5267 [44:18<00:00,  1.98it/s]


In [None]:
dic = {"Pokemons":Pokemons,
       "Features":Flatten_spectros}
df = pd.DataFrame(dic)
df.head()

Unnamed: 0,Pokemons,Features
0,snubbull2,"[0.47071075, 0.1474814, 0.034933314, 0.0070982..."
1,snorunt0,"[0.42210752, 0.09190294, 0.0145626245, 0.01777..."
2,solgaleo0,"[0.30447784, 0.019931337, 0.19393057, 0.008185..."
3,sobble4,"[0.51474607, 0.056961667, 0.016566401, 0.0, 0...."
4,sobble1,"[0.2293201, 0.08619146, 0.0058760643, 0.041448..."


In [None]:
#Vector with the features extracted from CNN 
Training_Feature_vector = np.matrix(Flatten_spectros)

In [None]:
######################################################
#        CAH Clustering                 #
######################################################
CAH = linkage(Training_Feature_vector,'ward')  
dendo = dendrogram(CAH,labels=df["Pokemons"]) 

df["Cluster_cah"]=list(CAH.labels_)

#Visualisation

plt.title('CAH Clustering')
plt.show()
plt.savefig("/content/drive/MyDrive/dendo.png")
plt.close("all")



KeyError: ignored

In [None]:
CAH

array([[2.44300000e+03, 3.67600000e+03, 0.00000000e+00, 2.00000000e+00],
       [1.68000000e+03, 3.32200000e+03, 4.94894298e+00, 2.00000000e+00],
       [4.58000000e+02, 4.60000000e+02, 5.65169944e+00, 2.00000000e+00],
       ...,
       [1.50000000e+02, 1.05290000e+04, 1.85900420e+01, 5.26500000e+03],
       [2.57300000e+03, 1.05300000e+04, 1.95288243e+01, 5.26600000e+03],
       [1.56000000e+02, 1.05310000e+04, 2.05503915e+01, 5.26700000e+03]])

In [None]:
#Visualitation
reduced_data = PCA(n_components=2).fit_transform(Training_Feature_vector)
results = pd.DataFrame(reduced_data,columns=['pca1','pca2'])
sns.scatterplot(x="pca1", y="pca2", hue=df['Cluster_kmeans'], data=results).set_title("CAH with 15 clusters")
plt.show()
plt.savefig("/content/drive/MyDrive/cah.png")
plt.close("all")

In [None]:
######################################################
#        Kmeans Clustering                 #
######################################################
kmeans = KMeans(n_clusters=15).fit(Training_Feature_vector)
df["Cluster_kmeans"]=list(kmeans.labels_)

#Visualitation
reduced_data = PCA(n_components=2).fit_transform(Training_Feature_vector)
results = pd.DataFrame(reduced_data,columns=['pca1','pca2'])

sns.scatterplot(x="pca1", y="pca2", hue=df['Cluster_kmeans'], data=results).set_title("K-means Clustering with 15 clusters")
plt.show()
plt.savefig("/content/drive/MyDrive/kmeans.png")
plt.close("all")


In [None]:
######################################################
#        PAM Clustering                 #
######################################################
kmedoids = KMedoids(n_clusters=15).fit(Training_Feature_vector)
df["Cluster_PAM"]=list(kmedoids.labels_)

#Visualitation
reduced_data = PCA(n_components=pca_num_components).fit_transform(Training_Feature_vector)
results = pd.DataFrame(reduced_data,columns=['pca1','pca2'])

sns.scatterplot(x="pca1", y="pca2", hue=df['Cluster_PAM'], data=results)
plt.title('PAM Clustering with 2 dimensions')
plt.show()
plt.savefig("/content/drive/MyDrive/pam.png")
plt.close("all")


NameError: ignored

In [None]:
df.head()

Unnamed: 0,Pokemons,Features,Cluster_kmeans,Cluster_cah
0,snubbull2,"[0.47071075, 0.1474814, 0.034933314, 0.0070982...",3,12
1,snorunt0,"[0.42210752, 0.09190294, 0.0145626245, 0.01777...",3,2
2,solgaleo0,"[0.30447784, 0.019931337, 0.19393057, 0.008185...",3,2
3,sobble4,"[0.51474607, 0.056961667, 0.016566401, 0.0, 0....",14,13
4,sobble1,"[0.2293201, 0.08619146, 0.0058760643, 0.041448...",12,5


In [None]:
df.to_csv("/content/drive/MyDrive/clusterings.csv")