In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.cm as cm

import xgboost as xgb
from xgboost import plot_tree
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.densenet import DenseNet121
from keras.applications.resnet import ResNet50
from keras.layers import Dense, GlobalAveragePooling2D
from keras.models import Model
from keras.models import Sequential
from tensorflow.keras import backend as K
from IPython.display import display
from PIL import Image
from keras.models import load_model
from keras.preprocessing import image

from sklearn.metrics import roc_curve
from sklearn.metrics import RocCurveDisplay
import scikitplot as skplt
from sklearn import metrics
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
import util


In [None]:
train_df = pd.read_csv("/media/ayse/ML/ML/train.csv")
test_df = pd.read_csv("~/Desktop/smallData/validate.csv")
sub=pd.read_csv("/media/ayse/ML/ML/sample_submission.csv")
train_df.head()

In [None]:
train_df['sex'] = train_df['sex'].fillna('na')
train_df['age_approx'] = train_df['age_approx'].fillna(0)
train_df['anatom_site_general_challenge'] = train_df['anatom_site_general_challenge'].fillna('na')

test_df['sex'] = test_df['sex'].fillna('na')
test_df['age_approx'] = test_df['age_approx'].fillna(0)
test_df['anatom_site_general_challenge'] = test_df['anatom_site_general_challenge'].fillna('na')
train_df['sex'] = train_df['sex'].astype("category").cat.codes +1
train_df['anatom_site_general_challenge'] = train_df['anatom_site_general_challenge'].astype("category").cat.codes +1


In [None]:
train_datagen = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode = 'nearest')

In [None]:
test_df['sex'] = test_df['sex'].astype("category").cat.codes +1
test_df['anatom_site_general_challenge'] = test_df['anatom_site_general_challenge'].astype("category").cat.codes +1
test_df.head()


In [None]:
x_train = train_df[['sex', 'age_approx','anatom_site_general_challenge']]
y_train = train_df['target']


x_test = test_df[['sex', 'age_approx','anatom_site_general_challenge']]
# y_train = test['target']


train_DMatrix = xgb.DMatrix(x_train, label= y_train)
test_DMatrix = xgb.DMatrix(x_test)

In [None]:
clf = xgb.XGBClassifier(n_estimators=2600, 
                        max_depth=15, 
                        objective='multi:softprob',
                        seed=0,  
                        nthread=-1, 
                        learning_rate=0.15, 
                        num_class = 2, 
                        scale_pos_weight = (32542/584))

clf.fit(x_train, y_train)

In [None]:
plot_tree(clf,num_trees=10)
plt.savefig("plot.png")

In [None]:
train_generator = train_datagen.flow_from_directory(
        '/media/ayse/ML/ML/jpeg/undersampletrain/', 
        target_size=(224, 224), 
        batch_size=1,
        class_mode='binary')  

In [None]:
test_datagen = ImageDataGenerator(
        rescale=1./255,
)
        

In [None]:
test_generator = test_datagen.flow_from_directory(
        '/home/ayse/Desktop/smallData/jpeg/validate', 
        target_size=(224, 224), 
        batch_size=1,
        class_mode='binary') 

In [None]:
def binary_focal_loss(gamma=2., alpha=.25):
    """
    Binary form of focal loss.
      FL(p_t) = -alpha * (1 - p_t)**gamma * log(p_t)
      where p = sigmoid(x), p_t = p or 1 - p depending on if the label is 1 or 0, respectively.
    References:
        https://arxiv.org/pdf/1708.02002.pdf
    Usage:
     model.compile(loss=[binary_focal_loss(alpha=.25, gamma=2)], metrics=["accuracy"], optimizer=adam)
    """

    def binary_focal_loss_fixed(y_true, y_pred):
        """
        :param y_true: A tensor of the same shape as `y_pred`
        :param y_pred:  A tensor resulting from a sigmoid
        :return: Output tensor.
        """
        y_true = tf.cast(y_true, tf.float32)
        # Define epsilon so that the back-propagation will not result in NaN for 0 divisor case
        epsilon = K.epsilon()
        # Add the epsilon to prediction value
        # y_pred = y_pred + epsilon
        # Clip the prediciton value
        y_pred = K.clip(y_pred, epsilon, 1.0 - epsilon)
        # Calculate p_t
        p_t = tf.where(K.equal(y_true, 1), y_pred, 1 - y_pred)
        # Calculate alpha_t
        alpha_factor = K.ones_like(y_true) * alpha
        alpha_t = tf.where(K.equal(y_true, 1), alpha_factor, 1 - alpha_factor)
        # Calculate cross entropy
        cross_entropy = -K.log(p_t)
        weight = alpha_t * K.pow((1 - p_t), gamma)
        # Calculate focal loss
        loss = weight * cross_entropy
        # Sum the losses in mini_batch
        loss = K.mean(K.sum(loss, axis=1))
        return loss

    return binary_focal_loss_fixed


In [None]:
def compute_class_freqs(labels):

    N = len(train_df)
    
    positive_frequencies = np.sum(labels, axis=0) / N
    negative_frequencies = 1 - positive_frequencies

    return positive_frequencies, negative_frequencies

In [None]:
base_model = DenseNet121(input_shape=(224,224,3),weights='imagenet', include_top=False)

x = base_model.output


# add a global spatial average pooling layer
x = GlobalAveragePooling2D()(x)

# and a logistic layer
predictions = Dense(1, activation="sigmoid",dtype='float32')(x)

model = Model(inputs=base_model.input, outputs=predictions)

for layers in base_model.layers:
    layers.trainable = False
    
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['binary_accuracy'])

model.summary()

In [None]:
test_target = test_df['target']
test_df.drop('target', axis=1, inplace=True)

In [None]:
freq_pos, freq_neg = compute_class_freqs(train_generator.labels)


In [None]:
pos_weights = freq_neg
neg_weights = freq_pos
pos_contribution = freq_pos * pos_weights 
neg_contribution = freq_neg * neg_weights

In [None]:
def get_weighted_loss(pos_weights, neg_weights, epsilon=1e-7):
  
    def weighted_loss(y_true, y_pred):
       
        loss = 0.0
        loss_pos = -1 * K.mean(pos_weights * y_true[0] * K.log(y_pred[0] + epsilon))
        loss_neg = -1 * K.mean(neg_weights * (1 - y_true[0]) * K.log(1 - y_pred[0] + epsilon))
        loss += loss_pos + loss_neg
        return loss
    
    return weighted_loss

In [None]:
history = model.fit_generator(train_generator, 
                              steps_per_epoch=1484, 
                              shuffle=True,
                              epochs = 15)

plt.plot(history.history['loss'])
plt.ylabel("loss")
plt.xlabel("epoch")
plt.title("Training Loss Curve")
plt.show()
plt.plot(history.history['binary_accuracy'])
plt.ylabel("binary accuracy")
plt.xlabel("epoch")
plt.title("Training Accuracy Curve")
plt.show()

In [None]:
predicted_vals = model.predict_generator(test_generator, len(test_generator))
print(predicted_vals)

In [None]:
def load_image(img_path, show=False):

    img = image.load_img(img_path, target_size=(224, 224))
    img_tensor = image.img_to_array(img)                   
    img_tensor = np.expand_dims(img_tensor, axis=0)        
    img_tensor /= 255.                                      

    if show:
        plt.imshow(img_tensor[0])                           
        plt.axis('off')
        plt.show()

    return img_tensor

In [None]:
img_path = '/home/ayse/Desktop/smallData/jpeg/validate/0/ISIC_0148677.jpg'    


new_image = load_image(img_path)

pred = model.predict(new_image)
pred

In [None]:
fpr, tpr, _ = metrics.roc_curve(test_target,  class_preds)
auc = metrics.roc_auc_score(test_target,  class_preds)
plt.plot(fpr,tpr,label="data 1, auc="+str(auc))
plt.legend(loc=4)
plt.show()

In [None]:
big_test_df = pd.read_csv("/media/ayse/ML/ML/test.csv")
big_test_df.head()

In [None]:
big_test_generator = test_datagen.flow_from_directory(
        '/media/ayse/ML/ML/jpeg/testparent',-
        target_size=(224, 224), 
        batch_size=1,
        class_mode='binary')  

In [None]:
big_predicted_vals = model.predict_generator(big_test_generator, len(big_test_generator))
print(big_predicted_vals)

In [None]:
submission =  pd.concat([
    big_test_image_names,
    pd.DataFrame(big_predicted_vals, dtype=np.float)
], axis=1, ignore_index=True)

In [None]:
submission.to_csv (r'/home/ayse/Desktop/validateCNN_submission.csv', index = False, header=True)

In [None]:
validate_df = pd.read_csv("/home/ayse/Desktop/shuffledvalidate.csv")
validate_target= validate_df['target']
validate_df.head()

In [None]:
validate_generator = test_datagen.flow_from_directory(
        '/home/ayse/Desktop/smallData/jpeg/validate', 
        target_size=(224, 224), 
        batch_size=1,
        class_mode='binary') 


In [None]:
predicted_validate_vals = model.predict_generator(validate_generator, len(validate_generator))
print(predicted_validate_vals)

In [None]:
predvalid_df = pd.DataFrame(predicted_validate_vals, dtype=np.float)
predvalid_df.to_csv('predvalid.csv',header=False, index=False)

In [None]:
imgnamesvalid_df = pd.read_csv("/home/ayse/Desktop/validatesubmission.csv")
imgnamesvalid_df.head()
validate_predictions = pd.concat([imgnamesvalid_df,predvalid_df],axis=1)
validate_predictions.to_csv("/home/ayse/Desktop/validatesubmissionfinal.csv", index=False)

In [None]:
validate_df['sex'] = validate_df['sex'].fillna('na')
validate_df['age_approx'] = validate_df['age_approx'].fillna(0)
validate_df['anatom_site_general_challenge'] = validate_df['anatom_site_general_challenge'].fillna('na')
validate_df['sex'] = validate_df['sex'].astype("category").cat.codes +1
validate_df['anatom_site_general_challenge'] = validate_df['anatom_site_general_challenge'].astype("category").cat.codes +1
#validate_df.head()
x_validate = validate_df[['sex', 'age_approx','anatom_site_general_challenge']]
#x_validate.head(100)
validate_DMatrix = xgb.DMatrix(x_validate)


In [None]:
clf.predict_proba(x_validate)[:,1]
sub = pd.read_csv("/home/ayse/Desktop/validatesubmission.csv")
sub.target = clf.predict_proba(x_validate)[:,1]
sub_tabular = sub.copy()

sub_tabular.to_csv('xgb_validate.csv', index = False)


In [None]:
matrix = confusion_matrix(test_target, class_preds, labels=[1,0])
TP, FN, FP, TN = confusion_matrix( test_target, class_preds, labels=[1,0]).reshape(-1)
print("tp: ", TP, "fn: ", FN, "fp: ", FP,  "tn: ", TN)

In [None]:
matrix = classification_report(test_target, class_preds, labels=[1,0])
print(matrix)

In [None]:
CNN = pd.read_csv("/home/ayse/Desktop/validatesubmissionfinal.csv")
seventh = pd.read_csv("/home/ayse/Desktop/submission.csv")
xgboost = pd.read_csv("/home/ayse/Desktop/xgb_validate.csv")
seventh.head()

In [None]:
subsubmission_df = pd.merge(seventh, CNN, how='right',on=['image_name'])
subsubmission_df.drop('target_x', axis=1, inplace=True)
subsubmission_df.head()

In [None]:
subsubmission_df = subsubmission_df.rename({'target_y':'target'},axis=1)
subsubmission_df.head()

In [None]:
ensemble = pd.DataFrame({'image_name':subsubmission_df['image_name'], 'target':(0.5*xgboost['target'] + 0.3*seventh['target'] + 0.2*CNN['target'])})
ensemble.to_csv('ensemble1.csv',header=True, index=False)
ensemble.head(10)



In [None]:
ensemble_df=pd.read_csv("/home/ayse/Desktop/ensemble1.csv")
ensembled_class = []
for i in (ensemble_df['target']):
    if(i < 0.2):
        ensembled_class.append(0)
    else:
        ensembled_class.append(1)


In [None]:
matrix = confusion_matrix(validate_target, ensembled_class, labels=[1,0])
TP, FN, FP, TN = confusion_matrix( validate_target, ensembled_class, labels=[1,0]).reshape(-1)
print("tp: ", TP, "fn: ", FN, "fp: ", FP,  "tn: ", TN)

In [None]:
matrix = classification_report(validate_target, ensembled_class, labels=[1,0])
print(matrix)