In [2]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.model_selection import train_test_split

In [8]:
def preprocessing():
    dataset = pd.read_csv("admission_result.csv")
    data = dataset.iloc[:, 0:1]
    label = dataset.result
    train_features, test_features, train_labels, test_labels = train_test_split(data, label, test_size=0.2, random_state=10)
    return train_features, test_features, train_labels, test_labels

    
# Training with Logistic Regression
def train():
    train_features, test_features, train_labels, test_labels = preprocessing()
    log_regressor = LogisticRegression()
    log_regressor.fit(train_features, train_labels)
    return log_regressor, test_features, test_labels


# Training with SGDClassifier, note the this approach and the previous one have the same 
# Approach to solve the problem, but the result may be different due to some optimization
# Concepts
def train_SGD():
    train_features, test_features, train_labels, test_labels = preprocessing()
    sgd_classifier = SGDClassifier(loss="log_loss", random_state=10)
    sgd_classifier.fit(train_features, train_labels)
    return sgd_classifier


def show_result():
    log_regressor, test_features, test_labels = train()
    sgd_classifier = train_SGD()
    predicted_labels_reg = log_regressor.predict(test_features)
    predicted_labels_sgd = sgd_classifier.predict(test_features)
    print(f"LogisticRegression Accuracy: {accuracy_score(test_labels, predicted_labels_reg)}")
    print(f"SGDClassifier Accuracy: {accuracy_score(test_labels, predicted_labels_sgd)}")
    print(f"LogisticRegression Coeficients: {log_regressor.coef_} Intercept: {log_regressor.intercept_}")
    print(f"SGDClassifier Coeficients: {sgd_classifier.coef_} Intercept: {sgd_classifier.intercept_}")
    print(f"Probability of each sample in LogisticRegression: {log_regressor.predict_proba(test_features)}")
    print(f"Probability of each sample in SGDClassifier: {sgd_classifier.predict_proba(test_features)}")    
show_result()

LogisticRegression Accuracy: 1.0
SGDClassifier Accuracy: 0.55
LogisticRegression Coeficients: [[0.85982697]] Intercept: [-49.85703502]
SGDClassifier Coeficients: [[130.58724747]] Intercept: [-1035.19915884]
Probability of each sample in LogisticRegression: [[8.07910862e-08 9.99999919e-01]
 [9.99999871e-01 1.29329177e-07]
 [9.99999673e-01 3.27308002e-07]
 [2.75486522e-09 9.99999997e-01]
 [1.77635684e-15 1.00000000e+00]
 [2.65130140e-11 1.00000000e+00]
 [9.99998006e-01 1.99406873e-06]
 [9.99930125e-01 6.98749772e-05]
 [9.99999256e-01 7.44098634e-07]
 [1.79856130e-13 1.00000000e+00]
 [9.99999995e-01 5.41297891e-09]
 [1.00000000e+00 4.53911322e-11]
 [4.24933422e-11 1.00000000e+00]
 [9.99999998e-01 1.73557514e-09]
 [2.99412495e-10 1.00000000e+00]
 [2.11698080e-07 9.99999788e-01]
 [9.99999996e-01 4.26687153e-09]
 [5.10704501e-09 9.99999995e-01]
 [2.27328806e-08 9.99999977e-01]
 [6.71982828e-08 9.99999933e-01]]
Probability of each sample in SGDClassifier: [[0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 