In [None]:
from scipy.io.arff import loadarff
import pandas as pd
import matplotlib.pyplot as plt
from numpy.fft import rfft

# ML
## preprocssing
from sklearn.model_selection import train_test_split

## Models
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

## evaluation
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay


In [None]:
data = loadarff('1_DatasetCharacteristics/data/InsectSound.arff')
df = pd.DataFrame(data[0])

In [None]:
# look at 10 random instances of the data set
df_test = df.sample(10)
df_test

In [None]:
sample = df.sample(10, random_state=1)
for i in range(len(sample)):
    plt.plot(range(len(df.columns)-1), sample.iloc[i, :-1], label = sample.iloc[i, -1])
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))

# FFT

In [None]:
for i in range(len(sample)):
    plt.plot(range(301), abs(rfft(sample.iloc[i, :-1])), label = sample.iloc[i, -1])
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))

In [None]:
# plot 5 instances of each species 
for i in df.target.unique():
    df_temp = df[df.target == i].head(5)
    for j in range(len(df_temp)) :
        plt.plot(range(301), abs(rfft(df_temp.iloc[j, :-1])))
    plt.title(i)
    plt.show()
    

In [None]:
target = df["target"]
df_fft = pd.DataFrame(abs(rfft(df.drop(columns = "target"))))

In [None]:
df_fft

In [None]:
# Importing LabelEncoder from Sklearn 
# library from preprocessing Module.
from sklearn.preprocessing import LabelEncoder

# Creating a instance of label Encoder.
le = LabelEncoder()

# Using .fit_transform function to fit label
# encoder and return encoded label
label = le.fit_transform(df['target'])

# printing label
label

# ML

#### Gaussian Naive Bayes

In [None]:
X, y = df_fft, label
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)
gnb = GaussianNB()
y_pred = gnb.fit(X_train, y_train).predict(X_test)

accuracy_score(y_test, y_pred)


In [None]:
ConfusionMatrixDisplay(confusion_matrix(y_test, y_pred)).plot()

#### Support Vector Classifier

In [None]:
clf = SVC(random_state=0)
clf.fit(X_train, y_train)

predictions = clf.predict(X_test)
cm = confusion_matrix(y_test, predictions, labels=clf.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm,
                               display_labels=clf.classes_)
disp.plot();

In [None]:
accuracy_score(y_test, predictions)

#### Random Forest Classifier

In [None]:
clf = RandomForestClassifier(random_state=0)
clf.fit(X_train, y_train)

predictions = clf.predict(X_test)
cm = confusion_matrix(y_test, predictions, labels=clf.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm,
                               display_labels=clf.classes_)
disp.plot();


In [None]:
accuracy_score(y_test, predictions)

#### kNN-Classifier

In [None]:

clf = KNeighborsClassifier(n_neighbors = 50)
clf.fit(X_train, y_train)

predictions = clf.predict(X_test)
cm = confusion_matrix(y_test, predictions, labels=clf.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm,
                               display_labels=clf.classes_)
disp.plot();
accuracy_score(y_test, predictions)