In [1]:
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import load_iris
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OrdinalEncoder
from sklvq import GMLVQ
from timeit import timeit
from sklearn.model_selection import KFold

from sklearn.model_selection import (
    cross_val_score,
    RepeatedKFold,
)


matplotlib.rc("xtick", labelsize="small")
matplotlib.rc("ytick", labelsize="small")

In [2]:
def getdata():
    data = pd.read_csv("feature_vectors.csv").to_numpy()
    labels = pd.read_csv("diagnosis_label.csv").to_numpy().squeeze()
    labelscenter = pd.read_csv("center_label.csv").to_numpy().squeeze()

    labelsfinal = labels + labelscenter
    
    # better way to perform this
    labelsdiseases1 = np.where(labelsfinal == 'HCUMCG') 
    centerlabels1 = labelsfinal[labelsdiseases1]
    labelsdiseases2 = np.where(labelsfinal == 'HCUGOSM') 
    #labelsdiseases = np.where(labels == 'HCCUN','1',centerlabels) 
    centerlabels2 = labelsfinal[labelsdiseases2]
    labelsdiseases3 = np.where(labelsfinal == 'HCCUN') 
    centerlabels3 = labelsfinal[labelsdiseases3]
    
    # better way to perform this
    centerlabels = np.concatenate((centerlabels1,centerlabels2,centerlabels3))
    centerdata = data[labelsdiseases1]
    centerdata = np.concatenate((centerdata,data[labelsdiseases2]))
    centerdata = np.concatenate((centerdata,data[labelsdiseases3]))
    
    return data,labels,centerdata,centerlabels

In [3]:
def ztransform(data_ztransform):
    # Sklearn's standardscaler to perform z-transform
    scaler = StandardScaler()

    # Compute (fit) and apply (transform) z-transform
    # why do we do an inverse z transform
    data_ztransform = scaler.fit_transform(data_ztransform)
    
    return data_ztransform

In [4]:
def model_definition():
    
    model = GMLVQ(
    distance_type="adaptive-squared-euclidean",
    activation_type="sigmoid",
    activation_params={"beta": 2},
    solver_type="sgd",
    solver_params={"max_runs": 20,"batch_size":1,"step_size": np.array([0.1, 0.05])},
    random_state=1428,)
    
    return model

In [5]:
def train_modelkfold(data, label, folds=10):
    modellist = list()
    accuracies = list()
    kfold = KFold(folds, shuffle=True,random_state=3)
    for training_indices, testing_indices in kfold.split(data):
        model = model_definition()
        accuracy = 0
        correct = 0
        trainX, trainY, testX, testY = data[training_indices], label[training_indices], data[testing_indices], label[testing_indices]
        model.fit(trainX, trainY)
        
        #validation accuracy
        # could use model.predict but it will only give the fraction of the correctly predicted labels
        #_, accuraccy = model.evaluate(testX, testY)
        predictedlabels = model.predict(testX)
        
        # since no model.evaluate.....
        
        for i in range(len(predictedlabels)):
            if(predictedlabels[i]==testY[i]):
                correct = correct+1
        
        accuracy = correct/len(testY)
        print('%.3f accuracy is ' % (correct/len(testY)))
        
        # stores accuracies and test data on last kth model
        accuracies.append(accuracy)
        modellist.append(model)
        #histories.append(history_kfold)
    return modellist,accuracies

In [6]:
data,labels,centerdata,centerlabels = getdata()
modellist,accuracies = train_modelkfold(centerdata,centerlabels)

0.889 accuracy is 
0.889 accuracy is 
1.000 accuracy is 
1.000 accuracy is 
1.000 accuracy is 
0.875 accuracy is 
0.750 accuracy is 
1.000 accuracy is 
0.875 accuracy is 
0.875 accuracy is 


In [7]:
type(modellist[1].omega_)

numpy.ndarray

0.8888888888888888