In [46]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
%matplotlib widget
from sklearn import ensemble, feature_selection, preprocessing, model_selection, neighbors, svm

## Load the dataset

In [47]:
input_file = "data.csv"
df = pd.read_csv(input_file, header = 0)

In [48]:
dataset = df.values
X = dataset[:,1:7]
y = dataset[:,7]
y = y.astype('int')

## Apply Tree based feature selection

In [49]:
col = df.columns.tolist()
col = col[1:7]

In [50]:
imp = np.zeros((6,))
for j in range(1000):
    clf = ensemble.ExtraTreesClassifier(n_estimators=50, random_state=j )
    clf = clf.fit(X, y)
    imp += clf.feature_importances_
imp = imp / 1000
imp

array([0.15940957, 0.13788721, 0.17424087, 0.16455568, 0.17854944,
       0.18535723])

In [51]:
for i in range(len(col)):
    print(f"{col[i]}: {imp[i]}")

# Liquid Flow Rate (mL/min) : 0.15940956903706738
 Atomization O2 Flow Rate (L/min) : 0.1378872132834783
 Sheath O2 Flow Rate (L/min) : 0.17424087331724217
 ER1 (EtOH/AtomO2) : 0.16455567640291435
 ER2 (EtOH/AtomO2+SheathO2): 0.17854944013682392
Burner Cap Temperature: 0.18535722782247394


In [52]:
model = feature_selection.SelectFromModel(clf, prefit=True)
X_new = model.transform(X)

In [53]:
sc = preprocessing.StandardScaler()
X_new = sc.fit_transform(X_new)

In [54]:
success = np.array([0.0,0.0,0.0])
failure = np.array([0.0,0.0,0.0])
for i in range(53):
    if y[i] == 0:
        failure = np.append(failure, X_new[i])
    else:
        success = np.append(success, X_new[i])
failure = failure.reshape((33,3))
success = success.reshape((22,3))

ValueError: cannot reshape array of size 131 into shape (33,3)

Stable: Orange, Unstable: Blue

In [None]:
fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(failure[1:, 0], failure[1:, 1], failure[1:, 2], alpha=0.8, marker="x")
ax.scatter(success[1:, 0], success[1:, 1], success[1:, 2], alpha=0.5, marker="^")

## Experimenting with models

In [None]:
X_train, X_cross, y_train, y_cross = model_selection.train_test_split(X_new, y, train_size=.8, random_state=1)
neigh = neighbors.KNeighborsClassifier()
neigh.fit(X_train, y_train)
neigh.score(X_train, y_train)

In [None]:
neigh.score(X_cross, y_cross)

In [None]:
X_train, X_cross, y_train, y_cross = model_selection.train_test_split(X_new, y, train_size=.8, random_state=1)
clf3 = svm.SVC(random_state=1)
clf3.fit(X_train, y_train)
clf3.score(X_train, y_train)

In [None]:
clf3.score(X_cross, y_cross)
