In [None]:
import os
import librosa
import librosa.display
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN, MeanShift
from sklearn.metrics import accuracy_score, log_loss, confusion_matrix, silhouette_score
import IPython.display as ipd
import plotly.express as px
import plotly.graph_objs as go
from sklearn.impute import KNNImputer
from tqdm import tqdm
import warnings
import scipy.cluster.hierarchy as shc
from plotly.offline import init_notebook_mode

In [None]:
tqdm.pandas()
warnings.filterwarnings("ignore")
sns.set_style("darkgrid")
plt.rcParams['figure.figsize'] = (20,8)
plt.rcParams['font.size'] = 16
init_notebook_mode(connected=True)

In [None]:
directory = 'sounds/crying'

In [None]:
data = pd.read_csv("model2.csv")
data

In [None]:
#data['file_id']+='.mp3'
data['voice_id'] = data['voice_id'].apply(lambda x: os.path.join(directory,x))
data

In [None]:
sns.countplot(data['reason'])
plt.show()

In [None]:
plt.close()

In [None]:
def zcr_plot(path, target):
    x , sr = librosa.load(path)
    zero_crossings = librosa.zero_crossings(x)
    print("Sum of zero crossing ", zero_crossings.sum())
    plt.title('Zero Crossing Rate of reason: '+target)
    zcrs = librosa.feature.zero_crossing_rate(x)
    plt.plot(zcrs[0])
    plt.show()

In [None]:
hungery = data[data['reason']=='hungery'].sample(1).iloc[0].voice_id
lonely = data[data['reason']=='lonely'].sample(1).iloc[0].voice_id
discomfortable = data[data['reason']=='discomfortable'].sample(1).iloc[0].voice_id
tired = data[data['reason']=='tired'].sample(1).iloc[0].voice_id
scared = data[data['reason']=='scared'].sample(1).iloc[0].voice_id
colic = data[data['reason']=='colic'].sample(1).iloc[0].voice_id
sleepy = data[data['reason']=='sleepy'].sample(1).iloc[0].voice_id

In [None]:
ipd.Audio(hungery)

In [None]:
zcr_plot(hungery, 'hungery')

In [None]:
plt.close()

In [None]:
zcr_plot(colic, 'pained')

In [None]:
plt.close()

In [None]:
zcr_plot(discomfortable, 'discompfortable')

In [None]:
plt.close()

In [None]:
zcr_plot(lonely, 'bored')

In [None]:
plt.close()

In [None]:
zcr_plot(tired, 'tired')

In [None]:
plt.close()

In [None]:
zcr_plot(sleepy, 'sleepy')

In [None]:
plt.close()

In [None]:
zcr_plot(scared, 'scared')

In [None]:
plt.close()

In [None]:
def get_zcrs(path):
    zcrs = []
    try:
        x , sr = librosa.load(path)#, res_type='kaiser_fast'
        zcrs = librosa.feature.zero_crossing_rate(x)[0]
    except:
        pass
    return zcrs

In [None]:
x , sr = librosa.load(data['voice_id'][1])
zcrs = librosa.feature.zero_crossing_rate(x)[0]
zcrs

In [None]:
get_zcrs(data['voice_id'][1])

In [None]:
X = pd.DataFrame(data['voice_id'].progress_apply(lambda x: get_zcrs(x)).tolist())

In [None]:
X

In [None]:
X_knn_ip = X.copy()
X_zero_ip = X.copy()

In [None]:
X.isnull().sum()

In [None]:
X_zero_ip

In [None]:
imputer = KNNImputer(n_neighbors=5)
X_zero_ip.fillna(value=0,inplace=True)
X_knn_ip = imputer.fit_transform(X_knn_ip)
X_knn_ip = pd.DataFrame(X_knn_ip)

y = data['reason']

In [None]:
sns.heatmap(X_zero_ip.corr())
plt.title("Correlation Heatmap for Zero Imputed Data")
plt.show()

In [None]:
plt.close()

In [None]:
sns.heatmap(X_knn_ip.corr())
plt.title("Correlation Heatmap for KNN Imputed Data")
plt.show()

In [None]:
plt.close()

In [None]:
def PrincipalComponentsAnalysis(X):
    pca = PCA()
    X_pca = pca.fit_transform(X)
    variance_explained = np.cumsum(pca.explained_variance_ratio_)
    pcs = range(1,len(variance_explained)+1)
    fig = px.line(x = pcs, y = variance_explained, title = 'Principal Components Cumulative Explained Variance', height=600, 
                  labels={
                         "x": "Principal Components",
                         "y": "Explained Variance"})
    fig.show()
    
    return X_pca


def kmeans_wcss(X,clusters):
    opt_val = []
    for i in range(1, clusters+1):
        kmeans = KMeans(n_clusters = i, init = 'k-means++', random_state = 42)
        kmeans.fit(X)
        opt_val.append(kmeans.inertia_)
        
    fig = px.line(x = range(1, clusters+1), y = opt_val, title = 'KMeans Elbow Method', height=600,  labels={
                     "x": "Number of Clusters",
                     "y": "WCSS"})
    fig.show()
    


In [None]:
X_pcs_1 = PrincipalComponentsAnalysis(X_zero_ip)

In [None]:
plt.close()

In [None]:
kmeans_wcss(X_pcs_1[:,:3],10)

In [None]:
kmeans = KMeans(n_clusters = 7, init = 'k-means++', random_state = 42)
kmeans.fit(X_pcs_1[:,:3])
y_label = kmeans.predict(X_pcs_1[:,:3])

Cluster3D(X_pcs_1,y_label)
print("Silhouette Score:\t",silhouette_score(X_pcs_1[:,:3], y_label))

In [None]:
X_zero_ip

In [None]:
X_zero_ip.iloc[:,:-1]