In [58]:
import numpy as np
from sklearn.externals import joblib
import copy
import random
import matplotlib as mp
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from sklearn.base import clone
from sklearn import metrics
from __future__ import unicode_literals

In [59]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

In [60]:
%matplotlib notebook

In [61]:
datadict = joblib.load("Two_Port_Standard_Dictionary_Cleaned_Interpolated.pkl")

In [62]:
num_fake_points = 15000
magS11_deviation = 0.003
magS21_deviation = 0.0001
magS22_deviation = 0.003
frequency_range = np.linspace(2,18,50)

In [63]:
magS11_points = []
magS21_points = []
magS22_points = []
for i in range(num_fake_points):
    new_dataset = copy.deepcopy(datadict['CTN210']['System 2,7'][random.choice(datadict['CTN210']['System 2,7'].keys())])
    new_dataset["magS11"] += np.random.normal(0,magS11_deviation,50)
    new_dataset["magS21"] += np.random.normal(0,magS21_deviation,50)
    new_dataset["magS22"] += np.random.normal(0,magS22_deviation,50)
    magS11_points.append(new_dataset["magS11"].tolist())
    magS21_points.append(new_dataset["magS21"].tolist())
    magS22_points.append(new_dataset["magS22"].tolist())
magS11_arr = np.array(magS11_points)
magS21_arr = np.array(magS21_points)
magS22_arr = np.array(magS22_points)

In [64]:
names = ["Gaussian Naive Bayes",
         "Decision Tree",
         "Random Forest",
         "Quadratic Discriminant Analysis",
         "Voting"]
classifiers = [GaussianNB(),
               DecisionTreeClassifier(),
               RandomForestClassifier(),
               QuadraticDiscriminantAnalysis(),
               VotingClassifier(estimators=[
                   ('gnb',GaussianNB()),
                   ('rf',RandomForestClassifier()),
                   ('dt',DecisionTreeClassifier()),
                   ('qda',QuadraticDiscriminantAnalysis())
               ],voting="soft")]

In [65]:
def get_trained_classifier(randomness_amplitude,untrained_classifier):
    incorrect_magS21_data = copy.deepcopy(magS21_arr)
    for i in range(len(incorrect_magS21_data)):
        incorrect_magS21_data[i] = (incorrect_magS21_data[i] + np.random.normal(0,randomness_amplitude,50))
    #Making training and test data
    S21_training_data = []
    S21_training_data.extend(incorrect_magS21_data.tolist())
    S21_training_data.extend(magS21_arr.tolist())
    S21_training_data_arr = np.array(S21_training_data)
    targets = ["Bad","Good"]
    S21_target_arr = np.repeat(targets,len(magS21_arr))
    S21_training_data_arr,S21_target_arr = shuffle(S21_training_data_arr,S21_target_arr, random_state=0)
    #Cloning and Training Classifier
    clf = clone(untrained_classifier)
    clf.fit(S21_training_data_arr,S21_target_arr)
    return clf

In [66]:
def get_test_data_predict_correct_percent(randomness_amplitude,trained_classifier):
    test_data = copy.deepcopy(magS21_arr)
    for i in range(len(test_data)):
        test_data[i] = test_data[i] + np.random.normal(0,randomness_amplitude,50)
    predicted = trained_classifier.predict(test_data)
    num_good_points = 0
    for index in range(len(predicted)):
        if predicted[index] == "Good":
            num_good_points += 1
    return (num_good_points/float(len(predicted)))*100.0

In [67]:
def get_test_data_accuracy(randomness_amplitude,trained_classifier):
    test_data = copy.deepcopy(magS21_arr)
    expected = []
    for i in range(len(test_data)):
        test_data[i] = test_data[i] + np.random.normal(0,randomness_amplitude,50)
        over_good_data_amplitude = False
        for valueindex in range(len(test_data[i])):
            if test_data[i][valueindex] > magS21_arr[i][valueindex] + magS21_deviation or test_data[i][valueindex] < magS21_arr[i][valueindex]-magS21_deviation:
                over_good_data_amplitude=True
        if over_good_data_amplitude:
            expected.append("Bad")
        else:
            expected.append("Good")
    predicted = trained_classifier.predict(test_data)
    num_correct_points = 0
    for index in range(len(predicted)):
        if predicted[index] == expected[index]:
            num_correct_points += 1
    return (num_correct_points/float(len(expected)))*100.0

In [68]:
num_test_amplitudes = 10
num_train_amplitudes = 10
test_amplitude_min = 0
train_amplitude_min = 0
test_amplitude_max = 0.0015
train_amplitude_max = 0.002

In [69]:
test_amplitudes = np.linspace(test_amplitude_min,test_amplitude_max,num_test_amplitudes)
training_amplitudes = np.linspace(train_amplitude_min,train_amplitude_max,num_train_amplitudes)
z_vals_accuracy = []
z_vals_c_percent = []
classifier = RandomForestClassifier()
for training_amp in training_amplitudes:
    trained_classifier = get_trained_classifier(training_amp,classifier)
    training_amp_c_percent_lst = []
    training_amp_accuracy_lst = []
    for amp in test_amplitudes:
        test_data_predict_correct_percent = get_test_data_predict_correct_percent(amp,trained_classifier)
        test_data_accuracy = get_test_data_accuracy(amp,trained_classifier)
        training_amp_c_percent_lst.append(test_data_predict_correct_percent)
        training_amp_accuracy_lst.append(test_data_accuracy)
    z_vals_accuracy.append(training_amp_accuracy_lst)
    z_vals_c_percent.append(training_amp_c_percent_lst)
    print "Finished Training Amplitude!"
print "Done!"

KeyboardInterrupt: 

In [None]:
x_vals,y_vals = np.meshgrid(training_amplitudes,test_amplitudes)

In [None]:
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D

In [None]:
plt.ion()
fig = plt.figure()
ax = fig.gca(projection='3d')
surf = ax.plot_wireframe(x_vals,y_vals,z_vals_c_percent)
plt.show()