In [1]:
import numpy as np
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn import tree
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

In [2]:
dataset = np.loadtxt("train.csv", delimiter=",", skiprows=1)
dataset_test = np.loadtxt("test.csv", delimiter=",", skiprows=1)
true_data = np.loadtxt("sample_submission.csv", delimiter=",", skiprows=1)

In [3]:
X = dataset[:, 1:-1]
y = dataset[:, -1]

# sort datasets by id
test_X_sorted = dataset_test[np.argsort(dataset_test[:, 0])]
true_y_sorted = true_data[np.argsort(true_data[:, 0])]

# delete id column
test_X_sorted = test_X_sorted[:, 1:]
true_y_sorted = true_y_sorted[:, 1]

In [4]:
# Logistic regression

lr_model = LogisticRegression(solver='lbfgs', C = 0.1, max_iter = 1000)
lr_model.fit(X, y)
lr_predicted = lr_model.predict(test_X_sorted)

In [5]:
# Support Vector Machines

svc_model = svm.SVC(gamma='scale')
svc_model.fit(X, y)

svc_predicted = svc_model.predict(test_X_sorted)

In [6]:
# Decision Trees

dtree_model = tree.DecisionTreeClassifier()
dtree_model.fit(X, y)
dtree_predicted = dtree_model.predict(test_X_sorted)

In [7]:
target_names = ['class 0', 'class 1'] 

print("confusion_matrix:\n", confusion_matrix(true_y_sorted, lr_predicted))
print(classification_report(true_y_sorted, lr_predicted, labels=[0.0, 1.0], target_names=target_names))

confusion_matrix:
 [[91 69]
 [ 0  0]]
              precision    recall  f1-score   support

     class 0       1.00      0.57      0.73       160
     class 1       0.00      0.00      0.00         0

    accuracy                           0.57       160
   macro avg       0.50      0.28      0.36       160
weighted avg       1.00      0.57      0.73       160



  'recall', 'true', average, warn_for)


In [8]:
target_names = ['class 0', 'class 1'] 
print("confusion_matrix:\n", confusion_matrix(true_y_sorted, svc_predicted))
print(classification_report(true_y_sorted, svc_predicted, labels=[0.0, 1.0], target_names=target_names))

confusion_matrix:
 [[110  50]
 [  0   0]]
              precision    recall  f1-score   support

     class 0       1.00      0.69      0.81       160
     class 1       0.00      0.00      0.00         0

    accuracy                           0.69       160
   macro avg       0.50      0.34      0.41       160
weighted avg       1.00      0.69      0.81       160



In [9]:
target_names = ['class 0', 'class 1'] 
print("confusion_matrix:\n", confusion_matrix(true_y_sorted, dtree_predicted))
print(classification_report(true_y_sorted, dtree_predicted, labels=[0.0, 1.0], target_names=target_names))

confusion_matrix:
 [[83 77]
 [ 0  0]]
              precision    recall  f1-score   support

     class 0       1.00      0.52      0.68       160
     class 1       0.00      0.00      0.00         0

    accuracy                           0.52       160
   macro avg       0.50      0.26      0.34       160
weighted avg       1.00      0.52      0.68       160

