In [None]:
# Import packages
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cross_validation import train_test_split
from sklearn.cross_validation import KFold, cross_val_score
from sklearn.metrics import confusion_matrix,precision_recall_curve,accuracy_score
from sklearn.metrics import auc,f1_score,roc_auc_score,roc_curve,recall_score
from sklearn.metrics import precision_score,recall_score,classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
import tensorflow as tf
from keras.models import Model, load_model
from keras.layers import Input, Dense
from keras.callbacks import ModelCheckpoint, TensorBoard
from keras import regularizers
import warnings
warnings.filterwarnings("ignore")

"""Code for Meta Ensemble model-1 and the Credit Card Fraud data"""

# Load data
train = pd.read_csv('train.csv')
weight = pd.read_csv('weight.csv')
test = pd.read_csv('test.csv')
X = train.iloc[:,:-1]
Y = train.iloc[:,-1]
weightX = weight.iloc[:,:-1]
weightY = weight.iloc[:,-1]
testX = test.iloc[:,:-1]
testY = test.iloc[:,-1]
# Load autoencoder model
autoencoder = load_model('model.h5')
weightX = autoencoder.predict(weightX)
weightX = pd.DataFrame(weightX)
train_A = autoencoder.predict(X)
train_A = pd.DataFrame(train_A)
test_A = autoencoder.predict(testX)
test_A = pd.DataFrame(test_A)

# Fine Tuning
X_train,X_cross,Y_train,Y_cross = train_test_split(train_A,Y,
                                                   test_size=0.20,
                                                   random_state=21, 
                                                   stratify=Y)
rf = RandomForestClassifier(random_state=1,class_weight='balanced')
param_grid = {'n_estimators': range(1,100),'min_samples_split':range(2,22)}
CV_lr = GridSearchCV(estimator=rf,param_grid=param_grid,cv=5,
                     scoring='f1',n_jobs=-1)
CV_lr.fit(X=X_train,y=Y_train)
best_param = CV_lr.best_params_
print("Best Paramters for Logistic regression: ",best_param)

# Training Results
clf = RandomForestClassifier(n_estimators=7,min_samples_split=14,
                             random_state=1,class_weight='balanced',n_jobs=-1)
clf.fit(train_A, Y)
y_pred=clf.predict(train_A)
precision = precision_score(Y,y_pred)
recall = recall_score(Y,y_pred)
accuracy = accuracy_score(Y,y_pred)
print("Accuracy: ",accuracy)
print("Recall",recall)
print("Precision", precision)
print("Classification report : ")
print(classification_report(Y,y_pred))
print("Confusion Matrix : ")
print(confusion_matrix(Y,y_pred))

# Test Predictions
clf = RandomForestClassifier(n_estimators=7,min_samples_split=14,
                             random_state=1,class_weight='balanced',n_jobs=-1)
clf.fit(train_A, Y)
y_pred=clf.predict(test_A)
precision = precision_score(testY,y_pred)
recall = recall_score(testY,y_pred)
accuracy = accuracy_score(testY,y_pred)
print("Accuracy: ",accuracy)
print("Recall",recall)
print("Precision", precision)
print("Classification report : ")
print(classification_report(testY,y_pred))
print("Confusion Matrix : ")
print(confusion_matrix(testY,y_pred))
pd.DataFrame(y_pred, columns=['Class']).to_csv('AErf.csv',index = False)

# For weight data
y_pred=clf.predict(weightX)
precision = precision_score(weightY,y_pred)
recall = recall_score(weightY,y_pred)
accuracy = accuracy_score(weightY,y_pred)
print("Accuracy: ",accuracy)
print("Recall",recall)
print("Precision", precision)
print("Classification report : ")
print(classification_report(weightY,y_pred))
print("Confusion Matrix : ")
print(confusion_matrix(weightY,y_pred))
pd.DataFrame(y_pred, columns=['Class']).to_csv('WAErf.csv',index = False)

"""Code for Meta Ensemble model-2 and the Credit Card Fraud data"""

# Load data
train = pd.read_csv('train2.csv')
test = pd.read_csv('test2.csv')
X = train.iloc[:,:-1]
Y = train.iloc[:,-1]
testX = test.iloc[:,:-1]
testY = test.iloc[:,-1]
autoencoder = load_model('model.h5')
train_A = autoencoder.predict(X)
train_A = pd.DataFrame(train_A)
test_A = autoencoder.predict(testX)
test_A = pd.DataFrame(test_A)

# Fine Tuning
X_train,X_cross,Y_train,Y_cross = train_test_split(train_A,Y,
                                                   test_size=0.20,
                                                   random_state=21, 
                                                   stratify=Y)
rf = RandomForestClassifier(random_state=1,class_weight='balanced')
param_grid = {'n_estimators': range(1,100),'min_samples_split':range(2,22)}
CV_lr = GridSearchCV(estimator=rf,param_grid=param_grid,
                     cv=5,scoring='f1',n_jobs=-1)
CV_lr.fit(X=X_train,y=Y_train)
best_param = CV_lr.best_params_
print("Best Paramters for Logistic regression: ",best_param)

# Training Predictions
y_pred=clf.predict(train_A)
precision = precision_score(Y,y_pred)
recall = recall_score(Y,y_pred)
accuracy = accuracy_score(Y,y_pred)
print("Accuracy: ",accuracy)
print("Recall",recall)
print("Precision", precision)
print("Classification report : ")
print(classification_report(Y,y_pred))
print("Confusion Matrix : ")
print(confusion_matrix(Y,y_pred))
pd.DataFrame(y_pred, columns=['Class']).to_csv('tAErf2.csv',index = False)

# Test Predictions
clf=RandomForestClassifier(n_estimators=7,min_samples_split=14,
                             random_state=1,class_weight='balanced',n_jobs=-1)
clf.fit(train_A, Y)
y_pred=clf.predict(test_A)
precision = precision_score(testY,y_pred)
recall = recall_score(testY,y_pred)
accuracy = accuracy_score(testY,y_pred)
print("Accuracy: ",accuracy)
print("Recall",recall)
print("Precision", precision)
print("Classification report : ")
print(classification_report(testY,y_pred))
print("Confusion Matrix : ")
print(confusion_matrix(testY,y_pred))
pd.DataFrame(y_pred, columns=['Class']).to_csv('AErf2.csv',index = False)

"""Code for Meta Ensemble model-1 and the Abalone data"""

# Load data
train = pd.read_csv("train.csv")
test =  pd.read_csv("test.csv")
weight1 =  pd.read_csv("weight1.csv")
X = train.iloc[:,:-1]
Y = train.iloc[:,-1]
X_test = test.iloc[:,:-1]
Y_test = test.iloc[:,-1]
weight1X = weight1.iloc[:,:-1]
weight1Y = weight1.iloc[:,-1]
autoencoder = load_model('model.h6')
weight1 = autoencoder.predict(weight1X)
weight1 = pd.DataFrame(weight1)
train_A = autoencoder.predict(X)
train_A = pd.DataFrame(train_A)
autoencoder = load_model('model.h6')
test_A = autoencoder.predict(X_test)
test_A = pd.DataFrame(test_A)

# Hyper-parameter tuning
rf = RandomForestClassifier(random_state=1,class_weight='balanced')
param_grid = {'n_estimators': range(1,21),'min_samples_split':range(2,11)}
CV_lr = GridSearchCV(estimator=rf,param_grid=param_grid,cv=5,
                     scoring='f1',n_jobs=-1)
CV_lr.fit(X=train,y=Y)
best_param = CV_lr.best_params_
print("Best Paramters : ",best_param)

# Training and test results
clf = RandomForestClassifier(n_estimators=9,min_samples_split=215,
                             random_state=41,class_weight='balanced',n_jobs=-1)
clf.fit(train_A, Y)
y_pred=clf.predict(test_A)
y_pred1=clf.predict(train_A)
precision = precision_score(Y,y_pred1)
recall = recall_score(Y,y_pred1)
accuracy = accuracy_score(Y,y_pred1)
print("Accuracy: ",accuracy)
print("Classification report : ")
print(classification_report(Y,y_pred1))
print("Confusion Matrix : ")
print(confusion_matrix(Y,y_pred1))
print("Test Accuracy: ",accuracy)
print("Classification report : ")
print(classification_report(Y_test,y_pred))
print("Confusion Matrix : ")
print(confusion_matrix(Y_test,y_pred))
pd.DataFrame(y_pred, columns=['Class']).to_csv('AErf.csv',index = False)

# Weight Set Results
y_pred=clf.predict(weight1X)
precision = precision_score(weight1Y,y_pred)
recall = recall_score(weight1Y,y_pred)
accuracy = accuracy_score(weight1Y,y_pred)
print("Accuracy: ",accuracy)
print("Recall",recall)
print("Precision", precision)
print("Classification report : ")
print(classification_report(weight1Y,y_pred))
print("Confusion Matrix : ")
print(confusion_matrix(weight1Y,y_pred))
pd.DataFrame(y_pred, columns=['Class']).to_csv('WAErf.csv',index = False)


"""Code for Meta Ensemble model-2 and the Abalone data"""

# Load data
train = pd.read_csv("train2.csv")
test =  pd.read_csv("test2.csv")
X = train.iloc[:,:-1]
Y = train.iloc[:,-1]
X_test = test.iloc[:,:-1]
Y_test = test.iloc[:,-1]
autoencoder = load_model('model.h6')
train_A = autoencoder.predict(X)
train_A = pd.DataFrame(train_A)
test_A = autoencoder.predict(X_test)
test_A = pd.DataFrame(test_A)

# Hyper-parameter tuning
rf = RandomForestClassifier(random_state=1,class_weight='balanced')
param_grid = {'n_estimators': range(1,21),'min_samples_split':range(2,11)}
CV_lr = GridSearchCV(estimator=rf,param_grid=param_grid,cv=5,
                     scoring='f1',n_jobs=-1)
CV_lr.fit(X=train,y=Y)
best_param = CV_lr.best_params_
print("Best Paramters : ",best_param)

# Results
clf = RandomForestClassifier(n_estimators=9,min_samples_split=215,
                             random_state=41,class_weight='balanced',n_jobs=-1)
clf.fit(train_A, Y)
y_pred=clf.predict(test_A)
y_pred1=clf.predict(train_A)
precision = precision_score(Y,y_pred1)
recall = recall_score(Y,y_pred1)
accuracy = accuracy_score(Y,y_pred1)
print("Accuracy: ",accuracy)
print("Classification report : ")
print(classification_report(Y,y_pred1))
print("Confusion Matrix : ")
print(confusion_matrix(Y,y_pred1))
print("Test Accuracy: ",accuracy_score(Y_test,y_pred))
print("Classification report : ")
print(classification_report(Y_test,y_pred))
print("Confusion Matrix : ")
print(confusion_matrix(Y_test,y_pred))
pd.DataFrame(y_pred, columns=['Class']).to_csv('AErf2.csv',index = False)
pd.DataFrame(y_pred1, columns=['Class']).to_csv('tAErf2.csv',index = False)