In [12]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.datasets import load_digits
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import Binarizer
import pandas as pd
import numpy as np
import titanic

In [2]:
titanic_df = pd.read_csv('train_titanic.csv')
y_titanic_df = titanic_df['Survived']
x_titanic_df = titanic_df.drop('Survived', axis=1)
x_titanic_df = titanic.transform_features(x_titanic_df)
x_train, x_test, y_train, y_test = train_test_split(x_titanic_df, y_titanic_df)

### 정확도

In [3]:
myclf = titanic.MyDummyClassifier()
myclf.fit(x_train, y_train)
pred = myclf.predict(x_test)

print(accuracy_score(y_test, pred))

0.7847533632286996


In [4]:
digits = load_digits()
y = (digits.target==7).astype(int)
x_train, x_test, y_train, y_test = train_test_split(digits.data, y)

In [5]:
fakeclf = titanic.MyFakeClassifier()
fakeclf.fit(x_train, y_train)
pred = fakeclf.predict(x_test)

print(accuracy_score(y_test, pred))

0.9111111111111111


### 오차행렬

In [6]:
confusion_matrix(y_test, pred)

array([[410,   0],
       [ 40,   0]])

### 정밀도와 재현율

In [7]:
titanic_df = pd.read_csv('train_titanic.csv')
y_titanic_df = titanic_df['Survived']
x_titanic_df = titanic_df.drop('Survived', axis=1)
x_titanic_df = titanic.transform_features(x_titanic_df)
x_train, x_test, y_train, y_test = train_test_split(x_titanic_df, y_titanic_df)

lr = LogisticRegression(solver='liblinear')
lr.fit(x_train, y_train)
pred = lr.predict(x_test)
accuracy, precision, recall = titanic.get_clf_eval(y_test, pred)

print(f'acc:accuracy:.3f prec:{precision:.3f} recall:{recall:.3f}')

acc:accuracy:.3f prec:0.652 recall:0.606


In [8]:
pred_proba = lr.predict_proba(x_test)
pred = lr.predict(x_test)

In [10]:
pred_proba.shape, pred.shape

((223, 2), (223,))

In [11]:
pred_proba[0], pred[0]

(array([0.80973998, 0.19026002]), 0)

In [13]:
x = [[1,2,1], [4,0,2], [1,2,3]]
binar = Binarizer(threshold=2)
binar.fit_transform(x)

array([[0, 0, 0],
       [1, 0, 0],
       [0, 0, 1]])

In [14]:
pp1 = pred_proba[:,1].reshape(-1,1)
pp1[:3]

array([[0.19026002],
       [0.26586172],
       [0.17904475]])