In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import sklearn
import os 
%matplotlib inline 

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier 
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import Imputer
from sklearn.metrics import confusion_matrix
from sklearn.utils.multiclass import unique_labels

In [3]:
from sklearn.svm import SVC 
from sklearn.utils import shuffle

In [4]:
import warnings
warnings.filterwarnings("ignore")

In [5]:
scaler = StandardScaler()

In [6]:
def load_dataset(filename):
    csv_path= os.path.join(path,filename) 
    return pd.read_csv(csv_path)

In [7]:
def evaluate_features(list_features,labels, testsize): 
    l1 = [3,4,7,8]
    acc = []
    prec = []
    rec = []
    f1 = []
    #print ('accuracy','precision ','recall ', '   f1')
    i = 1
    for feature in list_features : 
        #print ('feature',i )
        i+=1
        acc1 = []
        prec1 = []
        rec1 = []
        f1_1 = []
        for k in [3,4,7,8]: 
            features_train, features_test, labels_train, labels_test = train_test_split(feature, labels, test_size=testsize,random_state = 42)
            #print ('k=',k)
            classifier = KNeighborsClassifier(n_neighbors = k )
            features_train = scaler.fit_transform(features_train)
            features_test = scaler.transform(features_test)
            # Fit data
            classifier.fit (features_train, labels_train)

            pred = classifier.predict(features_test)
            accuracy = accuracy_score(labels_test, pred)
            precision = precision_score (labels_test, pred)
            recall = recall_score(labels_test, pred)
            f1s = f1_score(pred,labels_test)
            acc1.append(accuracy)
            prec1.append(precision)
            rec1.append(recall)
            f1_1.append(f1s)
            #print ('{:.2f}'.format(accuracy),"   ", '{:.2f}'.format(precision)," ",'{:10.2f}'.format(recall), '{:10.2f}'.format(f1s))
        acc.append(acc1)
        prec.append(prec1)
        rec.append(rec1)
        f1.append(f1_1)
    return acc, prec, rec, f1

In [3]:
def plot_metrics(list_feature,labels, testsize, y_lim):
    acc,prec,rec,f1 = evaluate_features(list_feature, labels, testsize)
    fig, (ax1,ax2,ax3,ax4) = plt.subplots(1,4, figsize= (20,5))
    l1 =[3,4,7,8]
    for (a,b) in [(ax1,acc),(ax2,prec),(ax3,rec),(ax4,f1)]: 
            for i in range(len(b[0])): 
                #print (l1,b[:][i])
                a.scatter(l1,b[:][i])
                a.set_ylim(y_lim[0],y_lim[1])
                a.set_yticks(np.arange(y_lim[0], y_lim[1], 0.05))
                a.plot(l1,b[i])
            a.legend(['features','features2','features3','features4'])
    ax1.set_ylabel('accuracy')
    ax2.set_ylabel('precision')
    ax3.set_ylabel('recall')
    ax4.set_ylabel('f1')
    for a in [ax1,ax2,ax3,ax4]: 
        a.set_xlabel('n_neighbors')

In [9]:
def get_svm(list_features,labels, testsize):
    i = 1
    acc = []
    prec = []
    rec = []
    f1s = []
    for features in list_features: 
        acc1 = []
        prec1 = []
        rec1 = []
        f1s1 = []
        #print ('features',i)
        i+=1
        
        features_train, features_test, labels_train, labels_test = train_test_split(features, labels, test_size=testsize,random_state = 42)
        kernels = ['linear', 'rbf', 'poly']
        #print ('         accuracy ','precision  ' ,'recall ' ,'   f1s ')
        for kname in kernels: 
            #print ('{:^7}'.format(kname), end = ':')

            classifier = SVC(C=1, kernel = kname)
            features_train = scaler.fit_transform(features_train)
            features_test = scaler.transform(features_test)
                        # Fit data
            classifier.fit (features_train, labels_train)

            pred = classifier.predict(features_test)
            acc1.append(accuracy_score(labels_test, pred))
            prec1.append(precision_score (labels_test, pred))  
            rec1.append(recall_score(labels_test,pred))
            f1s1.append(f1_score(pred,labels_test))
            #print ( '{:^10.3f}'.format(accuracy_score(labels_test, pred)), end = '|', flush = True)
            #print ('{:^10.3f}'.format(precision_score (labels_test, pred)), end = '|', flush = True)
            #print ( '{:^10.3f}'.format(recall_score(labels_test, pred)), end = '|', flush = True)
            #print ('{:^10.3f}'.format(f1_score(pred,labels_test)))
        acc.append(acc1)
        prec.append(prec1)
        rec.append(rec1)
        f1s.append(f1s1)
    return acc, prec,rec,f1s

In [25]:
def confusion_knn(list_features,labels, testsize):
    i = 0
    fig, ax = plt.subplots(len(list_features),3, figsize= (15,10))
    ax = ax.ravel()
    print ('Confusion matrix, with normalization')
    for features in list_features: 
        #print ('features ',i)
        
        features_train, features_test, labels_train, labels_test = train_test_split(features, labels, test_size=testsize,random_state = 42)
 
        #print ('         accuracy ','precision  ' ,'recall ' ,'   f1s ')
        for k in [3,4,7]: 
            #print ('{:^7}'.format(kname), end = ':')
            classifier = KNeighborsClassifier(n_neighbors = k )
            features_train = scaler.fit_transform(features_train)
            features_test = scaler.transform(features_test)
            # Fit data
            classifier.fit (features_train, labels_train)
            pred = classifier.predict(features_test)
            
            title = str(k) + ' features '+ str((i//3)+1)
            normalize = True
           
            #This function prints and plots the confusion matrix.
            #Normalization can be applied by setting `normalize=True`.
            
            """if not title:
                if normalize:
                    title = 'Normalized confusion matrix'
                else:
                    title = """
            

            # Compute confusion matrix
            cm = confusion_matrix(labels_test, pred)
            classes = unique_labels(labels_test,pred)
            # Only use the labels that appear in the data
            if normalize:
                cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
                #print("Normalized confusion matrix")
            #else:
                #print('Confusion matrix, without normalization')

            #print(cm)
            im = ax[i].imshow(cm, interpolation='nearest', cmap='Blues')
            ax[i].figure.colorbar(im, ax=ax[i])
           
            # We want to show all ticks...
            ax[i].set(xticks=np.arange(cm.shape[1]),
                   yticks=np.arange(cm.shape[0]),
                   # ... and label them with the respective list entries
                   xticklabels=classes, yticklabels=classes,
                   title=title,
                   ylabel='True label',
                   xlabel='Predicted label')

            # Rotate the tick labels and set their alignment.
            plt.setp(ax[i].get_xticklabels(), rotation=45, ha="right",
                     rotation_mode="anchor")

            # Loop over data dimensions and create text annotations.
            fmt = '.2f' if normalize else 'd'
            thresh = cm.max() / 2.
            for l in range(cm.shape[0]):
                for j in range(cm.shape[1]):
                    ax[i].text(j, l, format(cm[l, j], fmt),
                            ha="center", va="center",
                            color="white" if cm[l, j] > thresh else "black")
            fig.tight_layout()
            i+=1

In [1]:
def confusion_svm(list_features,labels, testsize):
    i = 0
    fig, ax = plt.subplots(len(list_features),3, figsize= (15,10))
    ax = ax.ravel()
    print ('Confusion matrix, with normalization')
    for features in list_features: 
        #print ('features ',i)
        
        features_train, features_test, labels_train, labels_test = train_test_split(features, labels, test_size=testsize,random_state = 42)
        kernels = ['linear', 'rbf', 'poly']
        #print ('         accuracy ','precision  ' ,'recall ' ,'   f1s ')
        for kname in kernels: 
            #print ('{:^7}'.format(kname), end = ':')

            classifier = SVC(C=1, kernel = kname)
            features_train = scaler.fit_transform(features_train)
            features_test = scaler.transform(features_test)
            classifier.fit (features_train, labels_train)
            pred = classifier.predict(features_test)
            
            
            title = kname + ' features '+ str((i//3)+1)
            normalize = True
           
            #This function prints and plots the confusion matrix.
            #Normalization can be applied by setting `normalize=True`.
            
            """if not title:
                if normalize:
                    title = 'Normalized confusion matrix'
                else:
                    title = """
            

            # Compute confusion matrix
            cm = confusion_matrix(labels_test, pred)
            classes = unique_labels(labels_test,pred)
            # Only use the labels that appear in the data
            if normalize:
                cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
                #print("Normalized confusion matrix")
            #else:
                #print('Confusion matrix, without normalization')

            #print(cm)
            im = ax[i].imshow(cm, interpolation='nearest', cmap='Blues')
            ax[i].figure.colorbar(im, ax=ax[i])
           
            # We want to show all ticks...
            ax[i].set(xticks=np.arange(cm.shape[1]),
                   yticks=np.arange(cm.shape[0]),
                   # ... and label them with the respective list entries
                   xticklabels=classes, yticklabels=classes,
                   title=title,
                   ylabel='True label',
                   xlabel='Predicted label')

            # Rotate the tick labels and set their alignment.
            plt.setp(ax[i].get_xticklabels(), rotation=45, ha="right",
                     rotation_mode="anchor")

            # Loop over data dimensions and create text annotations.
            fmt = '.2f' if normalize else 'd'
            thresh = cm.max() / 2.
            for l in range(cm.shape[0]):
                for j in range(cm.shape[1]):
                    ax[i].text(j, l, format(cm[l, j], fmt),
                            ha="center", va="center",
                            color="white" if cm[l, j] > thresh else "black")
            fig.tight_layout()
            i+=1
            

In [10]:
def visualize_svm(list_feature,labels, testsize, y_lim): 
    acc,prec,rec,f1 = get_svm(list_feature, labels, testsize)
    fig, (ax1,ax2,ax3,ax4) = plt.subplots(1,4, figsize= (20,5))
    for (a,b) in [(ax1,acc),(ax2,prec),(ax3,rec),(ax4,f1)]: 
        #print (b)
        #c = ['red', 'green','blue','yellow']
        #print (len(b))
        for i in range(len(b)): 
            #print (b[:][i])
            a.set_ylim(y_lim[0],y_lim[1])
            a.set_yticks(np.arange(y_lim[0], y_lim[1], 0.05))
            a.plot(['linear','rbf','poly'], b[:][i])
            a.scatter(['linear','rbf','poly'], b[:][i])
            
        a.legend(['features','features2','features3','features4'])
        #print ('\n')
    ax1.set_ylabel('accuracy')
    ax2.set_ylabel('precision')
    ax3.set_ylabel('recall')
    ax4.set_ylabel('f1')

In [11]:
def correl_draw(dataset): 
# with the dataframe correlation function
    corr = dataset.corr()
    fig, ax = plt.subplots(figsize = (20, 20))
    # Colours the rectangles by correlation value
    d = ax.matshow(corr,cmap='jet')
    # Draws x ticks labels
    plt.xticks(range(len(corr.columns)), corr.columns)
    # Draws y ticks labels
    plt.yticks(range(len(corr.columns)), corr.columns)
    # Shows figure
    fig.colorbar(d)
    plt.show()