# Preparation

In [6]:
import numpy as np
import pandas as pd
from sklearn.metrics import f1_score
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score

data = pd.read_csv('virusshare.csv', sep=',',skiprows=1, header=None).to_numpy()

X = data[:,1:]
Y = data[:,0]

def testModel(model,modelName):
    kf = KFold(n_splits=5)
    acc_scores = []
    f1_scores = []

    for train_index, test_index in kf.split(X):
        train_X, test_X = X[train_index], X[test_index]
        train_Y, test_Y = Y[train_index], Y[test_index]
        model.fit(train_X,train_Y)
        pred_values = model.predict(test_X)
        acc = accuracy_score(pred_values , test_Y)
        f1 = f1_score(pred_values , test_Y)
        acc_scores.append(acc)
        f1_scores.append(f1)

    avg_acc_score = sum(acc_scores)/5
    avg_f1_score = sum(f1_scores)/5

    print('Method: '+modelName)
    print('accuracy of each fold - {}'.format(acc_scores))
    print('Avg accuracy : {}'.format(avg_acc_score))
    print('f1 of each fold - {}'.format(f1_scores))
    print('Avg f1 : {}'.format(avg_f1_score))


# Simple Logistic Regression

In [7]:
from sklearn.linear_model import LogisticRegression
testModel(LogisticRegression(solver = "liblinear"),"logistic regression")

Method: logistic regression
accuracy of each fold - [0.73825, 0.73475, 0.7515, 0.74925, 0.7655]
Avg accuracy : 0.7478499999999999
f1 of each fold - [0.7389678384442783, 0.7562600505398576, 0.7607125662012517, 0.7631641086186541, 0.7759197324414715]
Avg f1 : 0.7590048592491028


# Multilayer Perceptron

In [8]:
from sklearn.neural_network import MLPClassifier
testModel(MLPClassifier(solver='sgd', learning_rate = "adaptive", learning_rate_init = 0.1, alpha=1e-5,hidden_layer_sizes=(10, 2), random_state=1,max_iter=500),"multilayer perceptron")

Method: multilayer perceptron
accuracy of each fold - [0.8145, 0.811, 0.8125, 0.81, 0.82425]
Avg accuracy : 0.8144500000000001
f1 of each fold - [0.8070722828913156, 0.8001057641459545, 0.7988197424892705, 0.8028022833419823, 0.814070351758794]
Avg f1 : 0.8045740849254633


# SVM

In [None]:
from sklearn.svm import LinearSVC
testModel(LinearSVC(penalty = "l1", loss="squared_hinge",dual=False, max_iter = 10000),"SVM")