In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, precision_score, recall_score, f1_score, accuracy_score

In [None]:
#Importing Machine1(Oct 2020) and Machine2(Aug 2020) datasets

MC1csv = pd.read_csv("MC2Oct2020_csv.csv")

MC2csv = pd.read_csv("MC2Aug2020_csv.csv")

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
MC1 = MC1csv.drop(columns=['Part', 'IC 2D','Lot','Date','Time','Machine','Server Result'], axis=1)
MC2 = MC2csv.drop(columns=['Part', 'IC 2D','Lot','Date','Time','Machine','Server Result'], axis=1)
MC1['Machine Result']=le.fit_transform(MC1['Machine Result'])
MC2['Machine Result']=le.fit_transform(MC2['Machine Result'])

In [None]:
Q1MC1 = MC1.quantile(0.25)
Q3MC1 = MC1.quantile(0.75)
IQR1 = Q3MC1-Q1MC1

MC1mean = MC1.mean()
MC1median = MC1.median()

MC1[(MC1<(Q1MC1 - 1.5 * IQR1))|(MC1>(Q3MC1 + 1.5 * IQR1))]=np.nan
MC1Mean = MC1.fillna(MC1mean) 
MC1Median = MC1.fillna(MC1median)
MC1Abs = (MC1.fillna(MC1mean).abs()) 


## Normalizing MC1Mean

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X = MC1Mean.drop('Machine Result', axis=1)
y = le.fit_transform(MC1Mean['Machine Result'])

In [None]:
from sklearn.model_selection import train_test_split
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y,random_state=1)

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
Xtrain = scaler.fit_transform(Xtrain)
Xtest = scaler.transform(Xtest)

## KNN MC1Mean

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import mean_squared_error, precision_score, recall_score, f1_score, accuracy_score, classification_report
knn = KNeighborsClassifier()
knn.fit(Xtrain, ytrain)

ypred = knn.predict(Xtest)
print("Before tuning KNN")
print("Mean squared error:", mean_squared_error(ytest, ypred))
print("Accuracy score:", accuracy_score(ytest, ypred))
print("Precision score: ", precision_score(ytest, ypred))
print("Recall score: ", recall_score(ytest, ypred)) #balance between precision and recall
print("F1 score: ", f1_score(ytest, ypred))

print(classification_report(ytest, ypred))

In [None]:
parameters = [{'weights': ['uniform', 'distance'],'n_neighbors': list(np.arange(1, 100, 1))}] 

grid = GridSearchCV(knn, parameters)
grid.fit(Xtrain, ytrain)
 
print(grid.best_estimator_) #print how the model looks after hyper parameter tuning 

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import mean_squared_error, precision_score, recall_score, f1_score, accuracy_score, classification_report
knn = KNeighborsClassifier()
knn.fit(Xtrain, ytrain)

ypred = knn.predict(Xtest)
print("After tuning KNN")
print("Mean squared error:", mean_squared_error(ytest, ypred))
print("Accuracy score:", accuracy_score(ytest, ypred))
print("Precision score: ", precision_score(ytest, ypred))
print("Recall score: ", recall_score(ytest, ypred)) #balance between precision and recall
print("F1 score: ", f1_score(ytest, ypred))

print(classification_report(ytest, ypred))

## GaussianNB MC1Mean

In [None]:
from sklearn.naive_bayes import GaussianNB 
from sklearn.metrics import mean_squared_error, precision_score, recall_score, f1_score, accuracy_score, classification_report

gnb = GaussianNB()
gnb.fit(Xtrain, ytrain)

ypred = gnb.predict(Xtest)
print("Before tuning GaussianNB")
print("Mean squared error:", mean_squared_error(ytest, ypred))
print("Accuracy score:", accuracy_score(ytest, ypred))
print("Precision score: ", precision_score(ytest, ypred))
print("Recall score: ", recall_score(ytest, ypred)) #balance between precision and recall
print("F1 score: ", f1_score(ytest, ypred))

print(classification_report(ytest, ypred))

In [None]:
parameters = [{'var_smoothing': np.logspace(0,-9, num=100) 
              }] 

grid = GridSearchCV(gnb, parameters)
grid.fit(Xtrain, ytrain)
 
print(grid.best_estimator_) #print how the model looks after hyper parameter tuning 

In [None]:
from sklearn.naive_bayes import GaussianNB 
from sklearn.metrics import mean_squared_error, precision_score, recall_score, f1_score, accuracy_score, classification_report

gnb = GaussianNB()
gnb.fit(Xtrain, ytrain)

ypred = gnb.predict(Xtest)
print("After tuning GaussianNB")
print("Mean squared error:", mean_squared_error(ytest, ypred))
print("Accuracy score:", accuracy_score(ytest, ypred))
print("Precision score: ", precision_score(ytest, ypred))
print("Recall score: ", recall_score(ytest, ypred)) #balance between precision and recall
print("F1 score: ", f1_score(ytest, ypred))

print(classification_report(ytest, ypred))

## RandomForestClassifier MC1Mean

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import mean_squared_error, precision_score, recall_score, f1_score, accuracy_score, classification_report

#Create a Gaussian Classifier
rfc=RandomForestClassifier()
rfc.fit(Xtrain, ytrain)


#Train the model using the training sets y_pred=clf.predict(X_test)
ypred = rfc.predict(Xtest)
print("Before tuning RandomForestClassifier")
print("Mean squared error:", mean_squared_error(ytest, ypred))
print("Accuracy score:", accuracy_score(ytest, ypred))
print("Precision score: ", precision_score(ytest, ypred))
print("Recall score: ", recall_score(ytest, ypred)) #balance between precision and recall
print("F1 score: ", f1_score(ytest, ypred))

print(classification_report(ytest, ypred))

In [None]:
parameters = [{'n_estimators':list(np.arange(1, 100, 1))
              }] 

grid = GridSearchCV(rfc, parameters)
grid.fit(Xtrain, ytrain)
 
print(grid.best_estimator_) #print how the model looks after hyper parameter tuning 

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import mean_squared_error, precision_score, recall_score, f1_score, accuracy_score, classification_report

#Create a Gaussian Classifier
rfc=RandomForestClassifier()
rfc.fit(Xtrain, ytrain)


#Train the model using the training sets y_pred=clf.predict(X_test)
ypred = rfc.predict(Xtest)
print("After tuning RandomForestClassifier")
print("Mean squared error:", mean_squared_error(ytest, ypred))
print("Accuracy score:", accuracy_score(ytest, ypred))
print("Precision score: ", precision_score(ytest, ypred))
print("Recall score: ", recall_score(ytest, ypred)) #balance between precision and recall
print("F1 score: ", f1_score(ytest, ypred))

print(classification_report(ytest, ypred))

## Normalizing MC1Median

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X = MC1Median.drop('Machine Result', axis=1)
y = le.fit_transform(MC1Median['Machine Result'])

In [None]:
from sklearn.model_selection import train_test_split
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y,random_state=1)

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
Xtrain = scaler.fit_transform(Xtrain)
Xtest = scaler.transform(Xtest)

## KNN MC1Median

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import mean_squared_error, precision_score, recall_score, f1_score, accuracy_score, classification_report
knn = KNeighborsClassifier()
knn.fit(Xtrain, ytrain)

ypred = knn.predict(Xtest)
print("Before tuning KNN")
print("Mean squared error:", mean_squared_error(ytest, ypred))
print("Accuracy score:", accuracy_score(ytest, ypred))
print("Precision score: ", precision_score(ytest, ypred))
print("Recall score: ", recall_score(ytest, ypred)) #balance between precision and recall
print("F1 score: ", f1_score(ytest, ypred))

print(classification_report(ytest, ypred))

In [None]:
parameters = [{'weights': ['uniform', 'distance'],'n_neighbors': list(np.arange(1, 100, 1))}] 

grid = GridSearchCV(knn, parameters)
grid.fit(Xtrain, ytrain)
 
print(grid.best_estimator_) #print how the model looks after hyper parameter tuning 

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import mean_squared_error, precision_score, recall_score, f1_score, accuracy_score, classification_report
knn = KNeighborsClassifier()
knn.fit(Xtrain, ytrain)

ypred = knn.predict(Xtest)
print("After tuning KNN")
print("Mean squared error:", mean_squared_error(ytest, ypred))
print("Accuracy score:", accuracy_score(ytest, ypred))
print("Precision score: ", precision_score(ytest, ypred))
print("Recall score: ", recall_score(ytest, ypred)) #balance between precision and recall
print("F1 score: ", f1_score(ytest, ypred))

print(classification_report(ytest, ypred))

## GaussianNB MC1Median

In [None]:
from sklearn.naive_bayes import GaussianNB 
from sklearn.metrics import mean_squared_error, precision_score, recall_score, f1_score, accuracy_score, classification_report

gnb = GaussianNB()
gnb.fit(Xtrain, ytrain)

ypred = gnb.predict(Xtest)
print("Before tuning GaussianNB")
print("Mean squared error:", mean_squared_error(ytest, ypred))
print("Accuracy score:", accuracy_score(ytest, ypred))
print("Precision score: ", precision_score(ytest, ypred))
print("Recall score: ", recall_score(ytest, ypred)) #balance between precision and recall
print("F1 score: ", f1_score(ytest, ypred))

print(classification_report(ytest, ypred))

In [None]:
parameters = [{'var_smoothing': np.logspace(0,-9, num=100) 
              }] 

grid = GridSearchCV(gnb, parameters)
grid.fit(Xtrain, ytrain)
 
print(grid.best_estimator_) #print how the model looks after hyper parameter tuning 

In [None]:
from sklearn.naive_bayes import GaussianNB 
from sklearn.metrics import mean_squared_error, precision_score, recall_score, f1_score, accuracy_score, classification_report

gnb = GaussianNB()
gnb.fit(Xtrain, ytrain)

ypred = gnb.predict(Xtest)
print("After tuning GaussianNB")
print("Mean squared error:", mean_squared_error(ytest, ypred))
print("Accuracy score:", accuracy_score(ytest, ypred))
print("Precision score: ", precision_score(ytest, ypred))
print("Recall score: ", recall_score(ytest, ypred)) #balance between precision and recall
print("F1 score: ", f1_score(ytest, ypred))

print(classification_report(ytest, ypred))

## RandomForestClassifier MC1Median

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import mean_squared_error, precision_score, recall_score, f1_score, accuracy_score, classification_report

#Create a Gaussian Classifier
rfc=RandomForestClassifier()
rfc.fit(Xtrain, ytrain)


#Train the model using the training sets y_pred=clf.predict(X_test)
ypred = rfc.predict(Xtest)
print("Before tuning RandomForestClassifier")
print("Mean squared error:", mean_squared_error(ytest, ypred))
print("Accuracy score:", accuracy_score(ytest, ypred))
print("Precision score: ", precision_score(ytest, ypred))
print("Recall score: ", recall_score(ytest, ypred)) #balance between precision and recall
print("F1 score: ", f1_score(ytest, ypred))

print(classification_report(ytest, ypred))

In [None]:
parameters = [{'n_estimators':list(np.arange(1, 100, 1))
              }] 

grid = GridSearchCV(rfc, parameters)
grid.fit(Xtrain, ytrain)
 
print(grid.best_estimator_) #print how the model looks after hyper parameter tuning 

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import mean_squared_error, precision_score, recall_score, f1_score, accuracy_score, classification_report

#Create a Gaussian Classifier
rfc=RandomForestClassifier()
rfc.fit(Xtrain, ytrain)


#Train the model using the training sets y_pred=clf.predict(X_test)
ypred = rfc.predict(Xtest)
print("After tuning RandomForestClassifier")
print("Mean squared error:", mean_squared_error(ytest, ypred))
print("Accuracy score:", accuracy_score(ytest, ypred))
print("Precision score: ", precision_score(ytest, ypred))
print("Recall score: ", recall_score(ytest, ypred)) #balance between precision and recall
print("F1 score: ", f1_score(ytest, ypred))

print(classification_report(ytest, ypred))

## Normalizing MC1Abs

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X = MC1Abs.drop('Machine Result', axis=1)
y = le.fit_transform(MC1Abs['Machine Result'])

In [None]:
from sklearn.model_selection import train_test_split
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y,random_state=1)

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
Xtrain = scaler.fit_transform(Xtrain)
Xtest = scaler.transform(Xtest)

## KNN MC1Abs

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import mean_squared_error, precision_score, recall_score, f1_score, accuracy_score, classification_report
knn = KNeighborsClassifier()
knn.fit(Xtrain, ytrain)

ypred = knn.predict(Xtest)
print("Before tuning KNN")
print("Mean squared error:", mean_squared_error(ytest, ypred))
print("Accuracy score:", accuracy_score(ytest, ypred))
print("Precision score: ", precision_score(ytest, ypred))
print("Recall score: ", recall_score(ytest, ypred)) #balance between precision and recall
print("F1 score: ", f1_score(ytest, ypred))

print(classification_report(ytest, ypred))

In [None]:
parameters = [{'weights': ['uniform', 'distance'],'n_neighbors': list(np.arange(1, 100, 1))}] 

grid = GridSearchCV(knn, parameters)
grid.fit(Xtrain, ytrain)
 
print(grid.best_estimator_) #print how the model looks after hyper parameter tuning 

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import mean_squared_error, precision_score, recall_score, f1_score, accuracy_score, classification_report
knn = KNeighborsClassifier(n_neighbors=16, weights='distance')
knn.fit(Xtrain, ytrain)

ypred = knn.predict(Xtest)
print("After tuning KNN")
print("Mean squared error:", mean_squared_error(ytest, ypred))
print("Accuracy score:", accuracy_score(ytest, ypred))
print("Precision score: ", precision_score(ytest, ypred))
print("Recall score: ", recall_score(ytest, ypred)) #balance between precision and recall
print("F1 score: ", f1_score(ytest, ypred))

print(classification_report(ytest, ypred))

## GaussianNB MC1Abs

In [None]:
from sklearn.naive_bayes import GaussianNB 
from sklearn.metrics import mean_squared_error, precision_score, recall_score, f1_score, accuracy_score, classification_report

gnb = GaussianNB()
gnb.fit(Xtrain, ytrain)

ypred = gnb.predict(Xtest)
print("Before tuning GaussianNB")
print("Mean squared error:", mean_squared_error(ytest, ypred))
print("Accuracy score:", accuracy_score(ytest, ypred))
print("Precision score: ", precision_score(ytest, ypred))
print("Recall score: ", recall_score(ytest, ypred)) #balance between precision and recall
print("F1 score: ", f1_score(ytest, ypred))

print(classification_report(ytest, ypred))

In [None]:
parameters = [{'var_smoothing': np.logspace(0,-9, num=100) 
              }] 

grid = GridSearchCV(gnb, parameters)
grid.fit(Xtrain, ytrain)
 
print(grid.best_estimator_) #print how the model looks after hyper parameter tuning 

In [None]:
from sklearn.naive_bayes import GaussianNB 
from sklearn.metrics import mean_squared_error, precision_score, recall_score, f1_score, accuracy_score, classification_report

gnb = GaussianNB()
gnb.fit(Xtrain, ytrain)

ypred = gnb.predict(Xtest)
print("After tuning GaussianNB")
print("Mean squared error:", mean_squared_error(ytest, ypred))
print("Accuracy score:", accuracy_score(ytest, ypred))
print("Precision score: ", precision_score(ytest, ypred))
print("Recall score: ", recall_score(ytest, ypred)) #balance between precision and recall
print("F1 score: ", f1_score(ytest, ypred))

print(classification_report(ytest, ypred))

## RandomForestClassifier MC1Abs

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import mean_squared_error, precision_score, recall_score, f1_score, accuracy_score, classification_report

#Create a Gaussian Classifier
rfc=RandomForestClassifier()
rfc.fit(Xtrain, ytrain)


#Train the model using the training sets y_pred=clf.predict(X_test)
ypred = rfc.predict(Xtest)
print("Before tuning RandomForestClassifier")
print("Mean squared error:", mean_squared_error(ytest, ypred))
print("Accuracy score:", accuracy_score(ytest, ypred))
print("Precision score: ", precision_score(ytest, ypred))
print("Recall score: ", recall_score(ytest, ypred)) #balance between precision and recall
print("F1 score: ", f1_score(ytest, ypred))

print(classification_report(ytest, ypred))

In [None]:
parameters = [{'n_estimators':list(np.arange(1, 100, 1))
              }] 

grid = GridSearchCV(rfc, parameters)
grid.fit(Xtrain, ytrain)
 
print(grid.best_estimator_) #print how the model looks after hyper parameter tuning 

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import mean_squared_error, precision_score, recall_score, f1_score, accuracy_score, classification_report

#Create a Gaussian Classifier
rfc=RandomForestClassifier()
rfc.fit(Xtrain, ytrain)


#Train the model using the training sets y_pred=clf.predict(X_test)
ypred = rfc.predict(Xtest)
print("After tuning RandomForestClassifier")
print("Mean squared error:", mean_squared_error(ytest, ypred))
print("Accuracy score:", accuracy_score(ytest, ypred))
print("Precision score: ", precision_score(ytest, ypred))
print("Recall score: ", recall_score(ytest, ypred)) #balance between precision and recall
print("F1 score: ", f1_score(ytest, ypred))

print(classification_report(ytest, ypred))