# Analyze SaD (Stasis and Decline)

## Imports

In [1]:
#Shawn Schwartz
#Alfaro Lab UCLA, 2019
#shawnschwartz@ucla.edu
import os
import numpy as np
import pandas as pd
from sklearn import svm
from sklearn.svm import SVC

## Declare Functions

In [2]:
def read_data(eigenvals, labels):
    #inputs:
    ##eigenvals: headerless .csv file path where rows represent an 
    ##n-row x m-column vector, and columns are each n-element 
    ##of the vector (row-wise)
    ##
    ##labels: .dat file path with a n-row x 1-column series of numerical
    ##integers indicative of the label/identifier of the corresponding
    ##row from the eigenvals input .csv file
    #
    #return:
    ##eigenvals_out: pandas dataframe of eigenvalues
    ##
    ##labels_out: numpy array of labels
    
    eigenvals_out = pd.read_csv(eigenvals, header=None)
    labels_out = np.loadtxt(labels)
    
    return eigenvals_out, labels_out

def prepare_data(eigenvals, labels):
    #inputs:
    ##eigenvals: pandas dataframe of eigenvalues
    ##
    ##labels: numpy array of labels
    #
    #return:
    ##eigenvals_out: numpy array of eigenvals
    ##
    ##labels_out: numpy array of eigenvals
    
    eigenvals_out = np.array(eigenvals)
    labels_out = np.array(labels)
    
    #check if numpy arrays are the same shape
    if (eigenvals_out.shape[0] == labels_out.shape[0]):
        print("Success: Eigenvalues Match Label Shape!")
        return eigenvals_out, labels_out
    else:
        print("Failure: Eigenvalues DON'T Match Label Shape!")
        exit()

def train_svm_data(eigenvals, labels, gamma):
    #inputs:
    ##eigenvals: numpy array of eigenvals
    ##
    ##labels: numpy array of eigenvals
    ##
    ##gamma: kernal function parameter defined in the SVC model
    #
    #return:
    ##clf: clf model
    ##
    ##clf_score: clf score from svm SVC fit/train
    
    clf = svm.SVC(gamma=gamma)
    clf.fit(eigenvals, labels)
    SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
        decision_function_shape='ovr', degree=3, gamma=gamma, kernel='rbf',
        max_iter=-1, probability=False, random_state=None, shrinking=True,
        tol=0.001, verbose=False)
    clf_score = clf.score(eigenvals, labels)
    
    return clf, clf_score

def predict_stasis_decline(eigenvals, model):
    #inputs:
    ##eigenvals: headerless .csv file path where rows represent an 
    ##n-row x m-column vector, and columns are each n-element 
    ##of the vector (row-wise)
    ##
    ##model: clf model
    #
    #return:
    ##prediction: predicted numpy array of labels corresponding to newly
    ##input eigenvals headerless .csv file
    
    eigenvals_data = pd.read_csv(eigenvals, header=None)
    prediction = model.predict(np.array(eigenvals_data))
    
    print("\nPredicted labels for new eigenvalues:\n(0=Stasis; 1=Decline)")
    return prediction

def score_predictions(prediction, truths):
    #inputs:
    ##prediction_score: numpy array of predicted labels for new input data
    ##
    ##truths: .dat file path with a n-row x 1-column series of numerical
    ##integers indicative of the label/identifier of the corresponding
    ##row from the eigenvals input .csv file
    #
    #return:
    ##prediction_score: proportion correctly classified
    
    correct = 0
    total = len(prediction)
    truths = np.loadtxt(truths)
    
    for ii in range(0,total):
        if prediction[ii] == truths[ii]:
            correct = correct + 1
    
    prediction_score = correct/total

    print("\nPrediction accuracy score:")
    return prediction_score

## Main Program Run

In [3]:
#constant filepath values
DATAFILES_DIR = 'input_data'
EIGENVALS_TRAIN = os.path.join(DATAFILES_DIR, 'eigenvalues_train.csv')
LABELS_TRAIN = os.path.join(DATAFILES_DIR, 'labels_train.dat')
EIGENVALS_TEST = os.path.join(DATAFILES_DIR, 'eigenvalues_test.csv')
LABELS_TEST = os.path.join(DATAFILES_DIR, 'labels_test.dat')

#prep data
X,y = read_data(EIGENVALS_TRAIN, LABELS_TRAIN)
X,y = prepare_data(X, y)

#train svm model
clf_model, clf_train_score = train_svm_data(X, y, 'scale')
print("Clf score: ", clf_train_score)

#predict new data
predicted_labels = predict_stasis_decline(EIGENVALS_TEST, clf_model)

#score prediction accuracy
prediction_accuracy_score = score_predictions(predicted_labels, LABELS_TEST)
print(prediction_accuracy_score*100,"%")

Success: Eigenvalues Match Label Shape!
Clf score:  1.0

Predicted labels for new eigenvalues:
(0=Stasis; 1=Decline)

Prediction accuracy score:
100.0 %
