In [23]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import pydicom
import random 
from tqdm import tqdm

import os
import keras
import pydicom

from sklearn.preprocessing import LabelEncoder
import pickle

def read_pickle(filename):    
    with open(filename, 'rb') as fp:
        return pickle.load(fp)
    
def save_pickle(data,filename):       
    with open(filename, 'wb') as fp:
        pickle.dump(data, fp)  
    
data_path = '../input/rsna-pneumonia-detection-challenge/'

images_path = data_path + 'stage_2_train_images/'
labels_path = data_path + 'stage_2_train_labels.csv'

detailed_class_info_path = data_path + 'stage_2_detailed_class_info.csv'

class_encoder = LabelEncoder()
   
def merge_dataframes():
    df = pd.read_csv(labels_path)
    details_df = pd.read_csv(detailed_class_info_path)
    df = pd.concat([df,details_df.drop('patientId',1)], 1) 
    print(df.describe())
    print(df.shape[0], 'cases')

    return df


  
def load_ids_and_labels_from_file():
    ids = read_pickle('ids')
    labels = read_pickle('labels')
    return ids,labels
  
def get_ids_and_labels(num_class):
    df = merge_dataframes()
    df['class_id'] = class_encoder.fit_transform(df['class'])

    df.sort_values(by=['patientId', 'class_id'])
    
    ids = df.patientId.tolist()
    
    if(num_class==2):
        labels =  df.Target.tolist() 
    else:
        labels =  df.class_id.tolist() 
    
    save_pickle(ids, 'ids')
    save_pickle(labels, 'labels')

    return ids,labels

      

num_class = 3
ids,labels = get_ids_and_labels(num_class)

                 x            y      ...            height        Target
count  9555.000000  9555.000000      ...       9555.000000  30227.000000
mean    394.047724   366.839560      ...        329.269702      0.316108
std     204.574172   148.940488      ...        157.750755      0.464963
min       2.000000     2.000000      ...         45.000000      0.000000
25%     207.000000   249.000000      ...        203.000000      0.000000
50%     324.000000   365.000000      ...        298.000000      0.000000
75%     594.000000   478.500000      ...        438.000000      1.000000
max     835.000000   881.000000      ...        942.000000      1.000000

[8 rows x 5 columns]
30227 cases


In [24]:
feature_tensors = np.load('../input/dense121-features/feature_vectors.npy')

In [25]:
from sklearn.model_selection import train_test_split

y = labels[:feature_tensors.shape[0]]
# y = keras.utils.to_categorical(y, num_classes=2)

X_train, X_test, y_train, y_test = train_test_split(feature_tensors, y, test_size=0.1, random_state=42)
# X_valid, X_test, y_valid, y_test  = train_test_split(X_valid, y_valid, test_size=0.4, random_state=42)

In [26]:
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score, classification_report, confusion_matrix, roc_curve
import matplotlib.pyplot as plt


def precision_recall(name, clf):  
    y_pred = clf.predict(X_test)
    
    if(num_class==2):
        roc_score = roc_auc_score(y_test, y_pred)
        print('roc_auc_score', roc_score)

    
    if(num_class==3):
        report = classification_report(y_test, y_pred, target_names=class_encoder.classes_)
    else:
        report = classification_report(y_test, y_pred)
        
    print('classification report for', name)
    print( report )
    

def evaluate_classifier(clf,name):    
    clf.fit(X_train,y_train)

    save_pickle(clf,name)

    precision_recall(name, clf)
    
    score = clf.score(X_test,y_test)
    
    print('average_score', round(score,3))

In [27]:
from sklearn.tree import DecisionTreeClassifier

clf = DecisionTreeClassifier(random_state=0)
name = 'DecisionTreeClassifier'
evaluate_classifier(clf,name)     

classification report for DecisionTreeClassifier
                              precision    recall  f1-score   support

                Lung Opacity       0.29      0.31      0.30       819
No Lung Opacity / Not Normal       0.40      0.38      0.39      1076
                      Normal       0.28      0.29      0.29       771

                   micro avg       0.33      0.33      0.33      2666
                   macro avg       0.33      0.33      0.33      2666
                weighted avg       0.33      0.33      0.33      2666

average_score 0.332


In [28]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

clf = LinearDiscriminantAnalysis()

name = 'LinearDiscriminantAnalysis'

evaluate_classifier(clf,name) 

classification report for LinearDiscriminantAnalysis
                              precision    recall  f1-score   support

                Lung Opacity       0.32      0.23      0.27       819
No Lung Opacity / Not Normal       0.40      0.61      0.49      1076
                      Normal       0.27      0.16      0.20       771

                   micro avg       0.36      0.36      0.36      2666
                   macro avg       0.33      0.33      0.32      2666
                weighted avg       0.34      0.36      0.34      2666

average_score 0.364


In [29]:
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

clf = QuadraticDiscriminantAnalysis()

name = 'QuadraticDiscriminantAnalysis'

evaluate_classifier(clf,name) 



classification report for QuadraticDiscriminantAnalysis
                              precision    recall  f1-score   support

                Lung Opacity       0.30      0.10      0.15       819
No Lung Opacity / Not Normal       0.42      0.26      0.32      1076
                      Normal       0.29      0.66      0.41       771

                   micro avg       0.33      0.33      0.33      2666
                   macro avg       0.34      0.34      0.29      2666
                weighted avg       0.35      0.33      0.29      2666

average_score 0.325


In [30]:
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier()

name = 'RandomForestClassifier'

evaluate_classifier(clf,name) 



classification report for RandomForestClassifier
                              precision    recall  f1-score   support

                Lung Opacity       0.31      0.37      0.34       819
No Lung Opacity / Not Normal       0.39      0.42      0.40      1076
                      Normal       0.26      0.19      0.22       771

                   micro avg       0.34      0.34      0.34      2666
                   macro avg       0.32      0.32      0.32      2666
                weighted avg       0.33      0.34      0.33      2666

average_score 0.336


In [31]:
from sklearn.svm import LinearSVC

clf = LinearSVC()
name = 'LinearSVC'

evaluate_classifier(clf,name) 

classification report for LinearSVC
                              precision    recall  f1-score   support

                Lung Opacity       0.41      0.03      0.05       819
No Lung Opacity / Not Normal       0.30      0.02      0.04      1076
                      Normal       0.29      0.94      0.44       771

                   micro avg       0.29      0.29      0.29      2666
                   macro avg       0.33      0.33      0.18      2666
                weighted avg       0.33      0.29      0.16      2666

average_score 0.29




In [32]:
from sklearn.naive_bayes import GaussianNB

clf = GaussianNB()

name = 'GaussianNB'

evaluate_classifier(clf,name) 

classification report for GaussianNB
                              precision    recall  f1-score   support

                Lung Opacity       0.32      0.06      0.10       819
No Lung Opacity / Not Normal       0.41      0.23      0.29      1076
                      Normal       0.29      0.73      0.42       771

                   micro avg       0.32      0.32      0.32      2666
                   macro avg       0.34      0.34      0.27      2666
                weighted avg       0.35      0.32      0.27      2666

average_score 0.32


In [33]:
from sklearn.dummy import DummyClassifier

clf = DummyClassifier(strategy='stratified', random_state=0)

name = 'Dummy stratified'

evaluate_classifier(clf,name)




clf = DummyClassifier(strategy='uniform', random_state=0)

name = 'Dummy uniform'

evaluate_classifier(clf,name) 

classification report for Dummy stratified
                              precision    recall  f1-score   support

                Lung Opacity       0.29      0.30      0.30       819
No Lung Opacity / Not Normal       0.41      0.39      0.40      1076
                      Normal       0.29      0.31      0.30       771

                   micro avg       0.34      0.34      0.34      2666
                   macro avg       0.33      0.33      0.33      2666
                weighted avg       0.34      0.34      0.34      2666

average_score 0.337
classification report for Dummy uniform
                              precision    recall  f1-score   support

                Lung Opacity       0.34      0.36      0.35       819
No Lung Opacity / Not Normal       0.39      0.33      0.36      1076
                      Normal       0.28      0.32      0.30       771

                   micro avg       0.34      0.34      0.34      2666
                   macro avg       0.34      0.34   

In [None]:
from keras.models import Sequential
from keras.layers import Dropout, Dense, Flatten,BatchNormalization,LeakyReLU
from keras.metrics import categorical_accuracy, binary_accuracy
import keras
from sklearn.model_selection import train_test_split

num_class = 2
ids,labels = get_ids_and_labels(num_class)

if(num_class==2):
    
    y = labels[:feature_tensors.shape[0]]
    y = np.array(y)
    X_train, X_test, y_train, y_test = train_test_split(feature_tensors, y, test_size=0.2, random_state=42)
    
    model = Sequential()
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=[binary_accuracy])

else:
    y = labels[:feature_tensors.shape[0]]
    y = keras.utils.to_categorical(y, num_class)
    X_train, X_test, y_train, y_test = train_test_split(feature_tensors, y, test_size=0.2, random_state=42)
    
    model = Sequential()
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(3, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=[categorical_accuracy])



model.fit(X_train,y_train,
          epochs=12,
          batch_size=16,
          validation_data=(X_test, y_test)
         )

model.save('dense121.h5') 