In [297]:
# import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression

In [298]:
# combine both DT and NN csv's
df = pd.concat(map(pd.read_csv, ['DT_out_new.csv', 'NN_out_new.csv']), ignore_index=False)
new_output = df.to_csv("new_ouput.csv")
df2 = pd.read_csv("new_ouput.csv")

# drop the second index column
df2 = df2.drop(columns=['Unnamed: 0'])
print(df2)

# splits
X_train, Y_test, y_train, y_test = train_test_split(df2, df2["Liked"], test_size=.20)
# print("y_test")
# print(y_test)

    Liked
0       0
1       0
2       0
3       1
4       0
5       1
6       1
7       1
8       1
9       1
10      0
11      1
12      1
13      0
14      1
15      0
16      1
17      0
18      0
19      1
20      0
21      0
22      1
23      1
24      0
25      0
26      1
27      1
28      0
29      0
30      1
31      1
32      1
33      1
34      0
35      0
36      1
37      0
38      1
39      0
40      0
41      1
42      1
43      1
44      0
45      0
46      0
47      0
48      1
49      0


In [299]:
# Decision Tree Classifier - 5 levels in tree
dt = DecisionTreeClassifier(max_depth=5) 
# Fit DT to the training data
dt.fit(X_train, y_train) 

# Make predictions of X train to test to the accuracy of Y train and test
y_train_pred = dt.predict(X_train)
y_test_pred = dt.predict(X_test)

# Train and Test set - Accuracy
dt_train_accuracy = accuracy_score(y_train, y_train_pred) 
dt_test_accuracy = accuracy_score(y_test, y_test_pred)

print('Training Set Accuracy: %s' % dt_train_accuracy)
print('Test Set Accuracy: %s' % dt_test_accuracy)

Training Set Accuracy: 1.0
Test Set Accuracy: 0.6


In [300]:
# Multilayer Perceptron Classifier - 
mlp = MLPClassifier(alpha=1, max_iter=1000)
mlp.fit(X_train, y_train)

# Predictions
y_train_pred = mlp.predict(X_train)
y_test_pred = mlp.predict(X_test)

# Training and Test set - Accuracy
mlp_train_accuracy = accuracy_score(y_train, y_train_pred) 
mlp_test_accuracy = accuracy_score(y_test, y_test_pred) 

print('Training Set Accuracy: %s' % mlp_train_accuracy)
print('Test Set Accuracy: %s' % mlp_test_accuracy)

Training Set Accuracy: 1.0
Test Set Accuracy: 0.6


In [301]:
model_list = [('dt',dt),('mlp',mlp) ]

# Stack model
stack_model = StackingClassifier(estimators=model_list, final_estimator=LogisticRegression())

# Fit stacked_model to the training data
stack_model.fit(X_train, y_train)

# Predictions
y_train_pred = stack_model.predict(X_train)
y_test_pred = stack_model.predict(X_test)

# Training and Test set - Accuracy
stack_train_accuracy = accuracy_score(y_train, y_train_pred) 
stack_test_accuracy = accuracy_score(y_test, y_test_pred) 

print('Training Set Accuracy: %s' % stack_train_accuracy)
print('Test Set Accuracy: %s' % stack_test_accuracy)

Training Set Accuracy: 1.0
Test Set Accuracy: 0.6


In [302]:
# Accuracy Train List
acc_train_list = {
'dt': dt_train_accuracy,
'mlp': mlp_train_accuracy,
'stack': stack_model_train_accuracy}

# Accuracy Test List
acc_test_list = {
'dt': dt_test_accuracy,
'mlp': mlp_test_accuracy,
'stack': stack_model_test_accuracy}

In [303]:
acc_df = pd.DataFrame.from_dict(acc_test_list, orient='index', columns=['Accuracy'])
acc_df
acc_df.to_csv('Stacked Results.csv')