In [1]:
import numpy as np 
import matplotlib as mpl
import matplotlib.pyplot as plt 
from sklearn.datasets import make_classification
import glob
import pandas as pd
from sklearn.neural_network import MLPClassifier as NN
from sklearn.ensemble import RandomForestClassifier as RF
from sklearn.gaussian_process import GaussianProcessClassifier as GP
from sklearn.gaussian_process.kernels import RBF
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression, Lasso
from sklearn.svm import SVC

In [2]:
def getData(fileText):
    #Get the data text sorted out
    column1 = []
    column2 = []
    for row in fileText.splitlines():
        if(row[7]!=" "):
            column1.append(row[7:15])
            column2.append(row[20:38])
    column1 = column1[10:310]
    column2 = column2[10:310]
    #Convert to numerical data     
    column1_num = pd.to_numeric(column1)
    column2_num = pd.to_numeric(column2)
    return column2_num

In [3]:
class Classifier:
    def __init__(self, X, y):
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.10, random_state=42)
        self.X_train = X_train
        self.X_test = X_test
        self.y_train = y_train
        self.y_test = y_test
    def NNClassify(self):
        clf = NN(solver='adam', activation='relu', alpha=0.0001, learning_rate_init=0.001, max_iter=500, random_state=42)
        clf.fit(self.X_train, self.y_train)
        print("Accuracy:", clf.score(self.X_test, self.y_test))
        return clf.predict(self.X_test)
    def RFClassify(self):
        clf = RF(n_estimators=10, criterion='entropy', random_state=42)
        clf.fit(self.X_train, self.y_train)
        print("Accuracy:", clf.score(self.X_test, self.y_test))
        return clf.predict(self.X_test)
    def GPClassify(self):
        rbfkernel = 1.0 * RBF(1.0)
        clf = GP(rbfkernel)
        clf.fit(self.X_train, self.y_train)
        print("Accuracy:", clf.score(self.X_test, self.y_test))
        return clf.predict(self.X_test)
    def LRClassify(self):
        clf = LogisticRegression(random_state=42)
        clf.fit(self.X_train, self.y_train)
        print("Accuracy:", clf.score(self.X_test, self.y_test))
        return clf.predict(self.X_test)
    def SVClassify(self):
        clf = SVC(kernel='rbf', degree=3, max_iter=-1)
        clf.fit(self.X_train, self.y_train)
        print("Accuracy:", clf.score(self.X_test, self.y_test))
        return clf.predict(self.X_test)

In [57]:
#Dummy data test
X, y = make_classification(random_state=42)
c = Classifier(X, y)
c.NNClassify()
c.RFClassify()
c.GPClassify()
c.LRClassify()
c.SVClassify()

Accuracy: 0.8
Accuracy: 1.0
Accuracy: 0.8
Accuracy: 0.8
Accuracy: 1.0


array([1, 1, 1, 0, 0, 0, 0, 0, 0, 0])

In [4]:
amorph = "/Users/johnrussell/Desktop/Titanium-Classification-Data/Amorphous Type 1 Titanium Edge/*.nor"
rutile = "/Users/johnrussell/Desktop/Titanium-Classification-Data/Rutile Type 2 Titanium/*.nor"
xdata = []
labels = []
num = 0
#Get data from all the files
for file in glob.glob(amorph):
    #print(num, file[file.index('XAFS'):]+"\n")
    num = num + 1
    fileCSV = pd.read_csv(file, skiprows=37)
    fileString = fileCSV.to_string()
    xdata.append(getData(fileString)[0])
    labels.append(0)

for file in glob.glob(rutile):
    #print(num, file[file.index('XAFS'):]+"\n")
    num = num + 1
    fileCSV = pd.read_csv(file, skiprows=37)
    #print(fileCSV)
    fileString = fileCSV.to_string()
    xdata.append(getData(fileString)[0])
    labels.append(1)
    

In [5]:
xdata = np.array(xdata, dtype=float).reshape(-1, 1)
cl = Classifier(xdata, labels)
print(cl.NNClassify())
print(cl.RFClassify())
print(cl.GPClassify())
print(cl.LRClassify())
print(cl.SVClassify())

Accuracy: 0.5789473684210527
[0 0 1 0 0 1 0 0 0 0 1 1 0 0 0 1 1 0 0]
Accuracy: 0.7368421052631579
[1 0 1 0 1 0 0 0 0 0 1 1 0 1 0 0 1 0 0]
Accuracy: 0.5263157894736842
[0 0 0 0 1 1 0 0 0 0 0 1 0 0 0 1 1 0 0]
Accuracy: 0.5789473684210527
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Accuracy: 0.5263157894736842
[0 0 0 0 1 1 0 0 0 0 0 1 0 0 0 1 1 0 0]
