In [None]:
import pandas as pd
import tensorflow as tf
from sklearn import svm
from sklearn.model_selection import train_test_split

In [None]:
dataframe = pd.read_csv('../dataset/signal_10k.csv', header=None)
raw_data = dataframe.values

labels = raw_data[:, -1]
data = raw_data[:, 0:-1]

train_data, test_data, train_labels, test_labels = train_test_split(data, labels, test_size=0.2, random_state=50)

min_val =  tf.reduce_min(raw_data)
max_val = tf.reduce_max(raw_data)
train_data = (train_data - min_val)/ (max_val - min_val)

train_data = tf.cast(train_data, tf.float32)
test_data = tf.cast(test_data, tf.float32)

train_labels = train_labels.astype(bool)
test_labels = test_labels.astype(bool)

In [None]:
kernels = ['Polynomial', 'RBF', 'Sigmoid','Linear']
def getClassifier(ktype):
    if ktype == 0:
        return svm.SVC(kernel='poly', degree=8, gamma="auto")
    elif ktype == 1:
        return svm.SVC(kernel='rbf', gamma="auto")
    elif ktype == 2:
        return svm.SVC(kernel='sigmoid', gamma="auto")
    elif ktype == 3:
        return svm.SVC(kernel='linear', gamma="auto")

In [None]:
from sklearn.metrics import classification_report

for i in range(4):
    X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size = 0.20)
    svclassifier = getClassifier(i)
    svclassifier.fit(X_train, y_train)
    y_pred = svclassifier.predict(X_test)
    print("Evaluation:", kernels[i], "kernel")
    print(classification_report(y_test,y_pred))

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
param_grid = {'C': [0.1,1, 10, 100], 'gamma': [1,0.1,0.01,0.001],'kernel': ['rbf', 'poly', 'sigmoid']}

In [None]:
grid = GridSearchCV(svm.SVC(),param_grid,refit=True,verbose=2)
grid.fit(X_train,y_train)

In [None]:
print(grid.best_estimator_)

In [None]:
grid_predictions = grid.predict(X_test)

In [None]:
print(grid_predictions)

In [None]:
print(classification_report(y_test,grid_predictions))

In [None]:
clf = svm.SVC(gamma=1, C=0.1)
clf.fit(train_data, train_labels)

In [None]:
import numpy as np

test_dataframe = pd.read_csv('../dataset/signal_10k_val.csv', header=None)
test_raw_data = test_dataframe.values

# get last element
test_labels = test_raw_data[:, -1]

# rest are data
test_data = test_raw_data[:, 0:-1]

test_data = (test_data - np.min(test_data))/(np.max(test_data) - np.min(test_data))

In [None]:
grid_predictions = clf.predict(test_data)

In [None]:
print(classification_report(test_labels,grid_predictions))

In [None]:
model = svm.SVC(gamma=1, C=0.1)
model.fit(X_train, y_train)

In [None]:
result = model.predict(test_data)

In [None]:
print(classification_report(test_labels, result))

In [None]:
correct_samples = 0

In [None]:
for i in range(0, len (result)):
    if result[i] == test_labels[i]:
        correct_samples+=1
print(correct_samples/len(result))

## Data Visualisation

In [None]:
import matplotlib.pyplot as plt

# Define the x-axis values
x = [10000, 25000, 50000, 100000]

# Define the y-values for the function
y1 = [0.89, 0.89, 0.88, 0.87]

# Create the plot
plt.figure(figsize=(10,6))

# Plot the function
plt.plot(x, y1, 'o-', label='Walidacja na zbiorze  statycznym')

# Annotate the y-values on the plot for each point
for xi, y1i in zip(x, y1):
    plt.annotate(f"{y1i}", (xi, y1i), textcoords="offset points", xytext=(0,5), ha='center')

    # Annotate x-values for each point
    plt.annotate(f"{xi}", (xi, 0), textcoords="offset points", xytext=(0,10), ha='center', fontsize=9, color='gray')

# Set y-limits and x-limits
plt.ylim(0, 1)
plt.xlim(5000, 110000)

# Label the axes
plt.xlabel("Dokładność modelu")
plt.ylabel("Liczba probek treningowych")

# Display the legend in the bottom left with an increased margin from the bottom
plt.legend(loc='lower left', bbox_to_anchor=(0, 0.1))

# Display the plot
plt.grid(True)
plt.tight_layout()
plt.show()