In [32]:
import tensorflow as tf
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

from tensorflow.python import keras
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense
from tensorflow.python.keras import backend as K


## Tf LR function

In [2]:
def train_lr_model(X_train, y_train, X_test, y_test,
                   iter_num, n_classes=2):
    
    tf.reset_default_graph()
    n_feats = X_train.shape[1]
    
    if n_classes == 2:
        n_classes -= 1
        
    w = tf.Variable(tf.random_normal(shape=[n_feats, n_classes]),
                    name='weights')
    b = tf.Variable(tf.random_normal(shape=[1, n_classes]),
                    name='bias')

    X = tf.placeholder(dtype=tf.float32, shape=[None, n_feats])
    Y = tf.placeholder(dtype=tf.float32, shape=[None, n_classes])

    Y_pred = tf.matmul(X, w) + b

    loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=Y_pred, labels=Y),
                          name='loss')

    opt = tf.train.GradientDescentOptimizer(0.005).minimize(loss)
    prediction = tf.round(tf.sigmoid(Y_pred))
    
    if n_classes == 1:        
        correct = tf.cast(tf.equal(prediction, Y), dtype=tf.float32)
    else:
        correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1))
        
    accuracy = tf.reduce_mean(tf.cast(correct, dtype=tf.float32))

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer()) 
        for epoch in range(iter_num):
            feed_dict = {X: X_train, Y: np.matrix(y_train).T}
            _, temp_loss = sess.run([opt, loss], feed_dict)
            temp_train_acc = sess.run(accuracy, feed_dict=feed_dict)
            b_out, w_out = sess.run([b, w], feed_dict=feed_dict)
            
            if (epoch + 1) % 100 == 0:
                print('epoch: {:4d} loss: {:5f} train accuracy: {:5f}'.format(epoch + 1,
                                                                              temp_loss,
                                                                              temp_train_acc))
        test_acc = sess.run(accuracy, feed_dict={X: X_test, Y: np.matrix(y_test).T})
        print()
        print('test accuracy: {:5f}'.format(test_acc))
    pass           

## Tf Titanic

In [33]:
titanic_df = pd.read_excel('data/titanic3.xls')

In [34]:
y = titanic_df['survived']
X = titanic_df.drop(columns=['survived', 'name',
                             'ticket', 'cabin', 'body',
                             'home.dest'])

In [35]:
X = pd.get_dummies(X, columns=['pclass', 'sex', 
                               'embarked', 'boat'], dummy_na=True)


In [36]:
num_feat = ['age', 'sibsp', 'parch', 'fare']
for feat in num_feat:
    X[feat] = X[feat].fillna(X[feat].median())


In [37]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, 
                                                    random_state=42,
                                                    stratify=y)

In [38]:
train_lr_model(X_train, y_train, X_test, y_test, 2000, n_classes=2)

epoch:  100 loss: 0.999018 train accuracy: 0.565425
epoch:  200 loss: 0.945618 train accuracy: 0.577841
epoch:  300 loss: 0.895660 train accuracy: 0.582617
epoch:  400 loss: 0.849084 train accuracy: 0.583572
epoch:  500 loss: 0.805798 train accuracy: 0.593123
epoch:  600 loss: 0.765676 train accuracy: 0.602674
epoch:  700 loss: 0.728569 train accuracy: 0.638968
epoch:  800 loss: 0.694309 train accuracy: 0.666667
epoch:  900 loss: 0.662712 train accuracy: 0.698185
epoch: 1000 loss: 0.649792 train accuracy: 0.708691
epoch: 1100 loss: 0.626216 train accuracy: 0.731614
epoch: 1200 loss: 0.603875 train accuracy: 0.747851
epoch: 1300 loss: 0.582756 train accuracy: 0.755492
epoch: 1400 loss: 0.562818 train accuracy: 0.759312
epoch: 1500 loss: 0.544010 train accuracy: 0.771729
epoch: 1600 loss: 0.526272 train accuracy: 0.779370
epoch: 1700 loss: 0.509544 train accuracy: 0.797517
epoch: 1800 loss: 0.493764 train accuracy: 0.800382
epoch: 1900 loss: 0.478872 train accuracy: 0.805158
epoch: 2000 

In [39]:
y_train, y_test = pd.get_dummies(y_train), pd.get_dummies(y_test)

In [47]:
n_feat = X_train.shape[1]

K.clear_session()
sess = tf.Session()
K.set_session(sess)

model = Sequential()
model.add(Dense(2, activation='softmax', input_shape=(n_feat,)))
model.compile('adam', 'categorical_crossentropy', metrics=['accuracy'])

model.fit(X_train, y_train, epochs=20,
          validation_data = (X_test, y_test))

Train on 1047 samples, validate on 262 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7f1d9c3016a0>

In [48]:
y_predicted_scores = model.predict(X_test)
y_predicted = y_predicted_scores.argmax(axis=1)

print('Classification report\n')
print(classification_report(np.matrix(y_test).argmax(axis=1), y_predicted))

Classification report

             precision    recall  f1-score   support

          0       0.89      0.99      0.94       162
          1       0.99      0.80      0.88       100

avg / total       0.93      0.92      0.92       262



## Tf Thyroid

In [3]:
thyroid_df = pd.read_csv('data/dataset_57_hypothyroid.csv')

(все действия - как в hw3, где был тот же датасет)

In [4]:
thyroid_df = thyroid_df.drop(['query_on_thyroxine', 
                              'on_antithyroid_medication',
                              'pregnant', 'thyroid_surgery',
                              'I131_treatment', 'lithium',
                              'goitre', 'referral_source',
                              'TBG', 'TSH_measured',
                              'T3_measured', 'TT4_measured',
                              'FTI_measured', 'TBG_measured',
                              'T4U_measured'], axis=1)

In [5]:
num_features = ['age', 'TSH', 'T3', 'TT4', 'T4U', 'FTI']

for feat in num_features:
    thyroid_df[feat] = pd.to_numeric(thyroid_df[feat], errors='coerce')
    feat_median = thyroid_df[feat].median()
    thyroid_df[feat] = thyroid_df[feat].replace(np.nan, feat_median)


In [6]:
thyroid_df['age>45'] = 1*(thyroid_df['age'] > 45)
thyroid_df = thyroid_df.drop('age', axis=1)

In [7]:
thyroid_df['sex'] = thyroid_df['sex'].replace('?', 'F')

In [8]:
y = thyroid_df['Class']
X = thyroid_df.drop('Class', axis=1)

In [9]:
try:
    num_features.remove('age')
except:
    pass
num_df = X[num_features]
cat_df = X.drop(num_features, axis=1)
cat_df = pd.get_dummies(cat_df)
X = pd.concat([cat_df, num_df],axis=1)

In [10]:
y_dum = pd.get_dummies(y)

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y_dum, test_size=0.2,
                                                    stratify=y)

In [12]:
train_lr_model(X_train, y_train.T, X_test, y_test.T, 2000, n_classes=4)

epoch:  100 loss: 2.724524 train accuracy: 0.913822
epoch:  200 loss: 1.725437 train accuracy: 0.920119
epoch:  300 loss: 1.270614 train accuracy: 0.921777
epoch:  400 loss: 0.829953 train accuracy: 0.921445
epoch:  500 loss: 0.574201 train accuracy: 0.925091
epoch:  600 loss: 0.366576 train accuracy: 0.927743
epoch:  700 loss: 0.259197 train accuracy: 0.935698
epoch:  800 loss: 0.247308 train accuracy: 0.938018
epoch:  900 loss: 0.237851 train accuracy: 0.940338
epoch: 1000 loss: 0.229983 train accuracy: 0.942990
epoch: 1100 loss: 0.223224 train accuracy: 0.943321
epoch: 1200 loss: 0.217165 train accuracy: 0.943653
epoch: 1300 loss: 0.211641 train accuracy: 0.943984
epoch: 1400 loss: 0.206590 train accuracy: 0.944316
epoch: 1500 loss: 0.201966 train accuracy: 0.943984
epoch: 1600 loss: 0.197693 train accuracy: 0.944647
epoch: 1700 loss: 0.193761 train accuracy: 0.944978
epoch: 1800 loss: 0.190460 train accuracy: 0.944978
epoch: 1900 loss: 0.187457 train accuracy: 0.944316
epoch: 2000 

In [13]:

#K.clear_session()
#sess = tf.Session()
#K.set_session(sess)

n_feat = X_train.shape[1]

model = Sequential()
model.add(Dense(4, activation='softmax', input_shape=(n_feat,)))
model.compile('adam', 'categorical_crossentropy', metrics=['accuracy'])

model.fit(X_train, y_train, epochs=5,
          validation_data = (X_test, y_test))

Train on 3017 samples, validate on 755 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f1dfd57bb38>

In [16]:
y_predicted_scores = model.predict(X_test)
y_predicted = y_predicted_scores.argmax(axis=1)

print('Classification report\n')
print(classification_report(np.matrix(y_test).argmax(axis=1), y_predicted))

Classification report

             precision    recall  f1-score   support

          0       0.00      0.00      0.00        39
          1       0.94      1.00      0.97       697
          2       0.85      0.58      0.69        19

avg / total       0.89      0.94      0.91       755

