In [None]:
from sklearn.ensemble import RandomForestClassifier
from micromlgen import port
import numpy as np
from glob import glob
from os.path import basename
from matplotlib import pyplot as plt

import warnings
warnings.filterwarnings("ignore")

In [None]:
def load_features(folder):
    dataset = None
    classmap = {}
    for class_idx, filename in enumerate(glob('%s/*.csv' % folder)):
        class_name = basename(filename)[:-4]
        classmap[class_idx] = class_name
        samples = np.loadtxt(filename, dtype=float, delimiter=',')
        labels = np.ones((len(samples), 1)) * class_idx
        samples = np.hstack((samples, labels))
        dataset = samples if dataset is None else np.vstack((dataset, samples))

    return dataset, classmap


In [None]:
# put your samples in the dataset folder
# one class per file
# one feature vector per line, in CSV format
features, classmap = load_features('dataset/')
X, y = features[:, :-1], features[:, -1]
classifier = RandomForestClassifier(n_estimators=30, max_depth=10).fit(X, y)
c_code = port(classifier, classmap=classmap)
print(c_code)

f = open("model.h", "w")
f.write(c_code)
f.close()


In [None]:
#splitting our dataset to test our models later
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size = 0.3, random_state = 1 )

In [None]:
#Stochastic Gradient Classifier with Sklearn
from sklearn import linear_model

# Creating the SGD Classifier
SGDClf = linear_model.SGDClassifier(max_iter = 1000, tol=1e-3,penalty = "elasticnet")

#Training the model
SGDClf.fit(X_train, y_train)

#Getting predictions
y_pred_sgd = SGDClf.predict(X_test)

In [None]:
#Suppot Vector Classifier with Sklearn
from sklearn.svm import SVC

#Create a svm Classifier
SVMClf = SVC(kernel='linear') # Linear Kernel

#Training the model
SVMClf.fit(X_train, y_train)

#Getting predictions
y_pred_svm = SVMClf.predict(X_test)

In [None]:
#Evaluating the models
from sklearn import metrics
#Accuracy
print(f'The accuracy for the SGD is: {metrics.accuracy_score(y_test, y_pred_sgd)}')
print(f'The accuracy for the SVM is: {metrics.accuracy_score(y_test, y_pred_svm)}')

#Precision
sgdprecision = metrics.precision_score(y_test, y_pred_sgd, average = 'macro')
svcprecision = metrics.precision_score(y_test, y_pred_svm, average = 'macro')

print(f'The precision score for the SGD is: {sgdprecision}')
print(f'The precision score for the SVM is: {svcprecision}')

#Recall
sgdrecall = metrics.recall_score(y_test, y_pred_sgd, average = 'weighted')
svcrecall = metrics.recall_score(y_test, y_pred_svm, average = 'weighted')
print(f'The recall score for the SGD is: {sgdrecall}')
print(f'The recall score for the SVM is: {svcrecall}')

In [None]:
#Outputting a confusion matrix for our models
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

sgd_cm = confusion_matrix(y_test, y_pred_sgd)

svc_cm = confusion_matrix(y_test, y_pred_svm)

disp_sgd = ConfusionMatrixDisplay(confusion_matrix=sgd_cm, display_labels=SGDClf.classes_)
disp_svc = ConfusionMatrixDisplay(confusion_matrix=svc_cm, display_labels=SVMClf.classes_)

disp_sgd.plot()
disp_svc.plot()

plt.savefig('confusionmatrix.png')
plt.show()

In [None]:
import pickle
pickle.dump(SGDClf, open("SGDClf.pkl", "wb"))
pickle.dump(SVMClf, open("SVMClf.pkl", "wb"))
pickle.dump(classifier, open("classifier.pkl", "wb"))