In [39]:
import os
import shutil
import hickle
import re

import pandas as pd
import tempfile
import matplotlib.pyplot as plt
import matplotlib.pylab as pylab

import numpy as np
import PIL
from PIL import Image, ImageChops, ImageDraw

from skorch import NeuralNetClassifier
from skorch.callbacks import LRScheduler, Checkpoint, EpochScoring, EarlyStopping
from skorch.dataset import Dataset
from skorch.helper import predefined_split
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, models, transforms
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, roc_curve

from pathlib import Path

import tensorflow as tf
from tensorflow import keras
import matplotlib.patches as patches

from tqdm import tqdm
from numba import jit, cuda
from utils import systemic_brightening
from train_random_pixels import PretrainedModel
import warnings

In [3]:
# set directory
os.chdir("/users/riya/race/classifier_experiments")

# ignore warnings
warnings.filterwarnings("ignore")

# import model
segmentation_classifier = keras.models.load_model('models/MIMIC-256x25680-20-split-resnet-Float16_2-race_detection_rop_seg_data_rop_seg-0.001_20220321-054140_epoch:011.hdf5')

The dtype policy mixed_float16 may run slowly because this machine does not have a GPU. Only Nvidia GPUs with compute capability of at least 7.0 run quickly with mixed_float16.
Instructions for updating:
Use tf.keras.mixed_precision.LossScaleOptimizer instead. LossScaleOptimizer now has all the functionality of DynamicLossScale


### Test Model Loading

In [71]:
model_path = 'outputs/checkpoints/model_original_epoch50.pt'

In [72]:
model = PretrainedModel(2)

In [73]:
model.load_state_dict(torch.load(model_path))

<All keys matched successfully>

In [77]:
net = NeuralNetClassifier(model, criterion=nn.CrossEntropyLoss)

In [80]:
net.predict_proba()

TypeError: predict_proba() missing 1 required positional argument: 'X'

### Tensorflow Inference

In [3]:
def image_from_id(img_path, path_name):
    arr = np.array(Image.open(img_path + path_name))
    resized = cv2.resize(arr, (256,256))
    channels = np.repeat(resized[:, :, np.newaxis], 3, axis=2).reshape((256,256,3))
    
    return channels

In [4]:
def get_race_from_id(img_id, race_csv_path):

    race_data = pd.read_csv(race_csv_path)
    img_row = race_data.loc[race_data['image_id'] == int(img_id)] # they both must be ints
    img_row = img_row.reset_index(drop=True) # for .at to work
    img_race = img_row.at[0,'race']
    
    return img_race    

In [5]:
# run through classifier

@jit         
def predict_on_images(img_path, preds_df, colname,
                     skeleton, thresh_type, intensity_change, brighten_sum,
                     csv_name = "brightened_predictions", 
                     preds_path = "/users/riya/race/classifier_experiments/predictions/experiment1_plus_systemic_brightening/", 
                     race_csv_path = "/users/riya/race/csv/image_race_data.csv"): 
    
    img_files = os.listdir(img_path)
    num_images = len(img_files)
    
    print(num_images) # fine, 4546
    
    id_arr = [0] * num_images
    race_arr = [0] * num_images
    # img_arr = np.zeros((num_images, 256, 256, 3))
    
    preds_arr = [0] * num_images
    
    for i in tqdm(range(num_images)): # add tqdm back later
        channels = image_from_id(img_path, img_files[i])
        modified_img = systemic_brightening(channels, skeleton, thresh_type, intensity_change, brighten_sum,
                                           image_size = (256, 256))
        modified_img = np.array(modified_img).reshape((1,256,256,3)) # np reshape, bc substitute? 
        modified_img = modified_img.astype(np.uint8)
        
        proba = segmentation_classifier(modified_img)        
        preds_arr[i] = proba.numpy()[0,1] # returning white prediction
        
        # img_arr[i] = modified_img.astype(np.uint8) # uint8
                       
        # getting id     
        img_id = re.findall(r'\d+', img_files[i])[0] # only one number
        id_arr[i] = int(img_id) # be sure it's int
    

        # getting race
        img_race = get_race_from_id(img_id, race_csv_path)
        race_arr[i] = img_race
    
    preds_df['id'] = id_arr
    preds_df['race'] = race_arr
    preds_df[colname] = preds_arr      
    
    preds_df.to_csv(preds_path + csv_name + ".csv")


In [6]:
all_predictions = pd.DataFrame(columns = ['id', '30', '60', '90', '120', '150']) # from id I can get race

dataset_path = "/users/riya/race/dataset/segmentations/"

In [7]:
predict_on_images(dataset_path, all_predictions, '30', False, 'below', 'brighten', 30)

4546


100%|██████████| 4546/4546 [32:36<00:00,  2.32it/s]


In [8]:
bright_preds = pd.read_csv("/users/riya/race/classifier_experiments/predictions/experiment1_plus_systemic_brightening/" 
                           + "brightened_predictions.csv", usecols = ['id', 'race','30', '60', '90', '120', '150'])

In [9]:
bright_preds

Unnamed: 0,id,30,60,90,120,150,race
0,101934,0.156814,,,,,black
1,82379,0.014365,,,,,white
2,87427,0.000071,,,,,black
3,49236,0.000039,,,,,black
4,45251,0.001444,,,,,black
...,...,...,...,...,...,...,...
4541,92793,0.000316,,,,,black
4542,31340,0.000316,,,,,black
4543,99058,0.000316,,,,,black
4544,33415,0.000316,,,,,white


In [9]:
predict_on_images(dataset_path, all_predictions, '30', False, 'below', 'brighten', 30)
predict_on_images(dataset_path, all_predictions, '60', False, 'below', 'brighten', 60)
predict_on_images(dataset_path, all_predictions, '90', False, 'below', 'brighten', 90)
predict_on_images(dataset_path, all_predictions, '120', False, 'below', 'brighten', 120)         
predict_on_images(dataset_path, all_predictions, '150', False, 'below', 'brighten', 150)     

100%|██████████| 4546/4546 [15:15<00:00,  4.96it/s]
100%|██████████| 4546/4546 [12:36<00:00,  6.01it/s]
100%|██████████| 4546/4546 [12:40<00:00,  5.98it/s]
100%|██████████| 4546/4546 [12:44<00:00,  5.95it/s]
100%|██████████| 4546/4546 [12:46<00:00,  5.93it/s]


In [10]:
predict_on_images(dataset_path, all_predictions, '30', False, 'below', 'dull', 30, csv_name = "dulled_predictions")
predict_on_images(dataset_path, all_predictions, '60', False, 'below', 'dull', 60, csv_name = "dulled_predictions")
predict_on_images(dataset_path, all_predictions, '90', False, 'below', 'dull', 90, csv_name = "dulled_predictions")
predict_on_images(dataset_path, all_predictions, '120', False, 'below', 'dull', 120, csv_name = "dulled_predictions")         
predict_on_images(dataset_path, all_predictions, '150', False, 'below', 'dull', 150, csv_name = "dulled_predictions")                          

100%|██████████| 4546/4546 [12:37<00:00,  6.00it/s]
100%|██████████| 4546/4546 [12:42<00:00,  5.96it/s]
100%|██████████| 4546/4546 [12:40<00:00,  5.98it/s]
100%|██████████| 4546/4546 [12:49<00:00,  5.91it/s]
100%|██████████| 4546/4546 [12:52<00:00,  5.89it/s]


5


## Pytorch Inference

In [64]:
def generate_dataset():
    csv_path = "/users/riya/race/csv/image_race_data.csv" 
    data_path = "/users/riya/race/dataset/segmentations/" # inference on segmentation images
    save_path = "/users/riya/race/classifier_experiments/dataset/experiment1_plus_systemic_brightening/test/" 
    
    files = os.listdir(data_path)
    for i in range(len(files)):
        img_id = os.path.splitext(files[i])[0]
        img_race = get_race_from_id(img_id, csv_path)
     
        img = Image.open(data_path + img_id + ".bmp")
        
        if img_race == 'black':
            img.save(save_path + "black/" + img_id + ".bmp") 
        if img_race == 'white':
            img.save(save_path + "white/" + img_id + ".bmp") 
        

In [65]:
generate_dataset()

In [68]:
save_path = "/users/riya/race/classifier_experiments/dataset/experiment1_plus_systemic_brightening/test/white" 
len(os.listdir(save_path))

2837

In [69]:
1709 + 2837 # good

4546

### Performing Inference

In [5]:
def test(data_dir, thresh_type, intensity_change, brighten_sum, experiment_name, model_path,
         skeleton=False, none_thresh = 20, num_classes = 2, image_size = (224, 224)):
    
    os.chdir("/users/riya/race/classifier_experiments/CNN_train/")
    race_data_path = "/users/riya/race/csv/image_race_data.csv"

    device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
    if device == 'cuda:1':
        torch.cuda.empty_cache()
    
    csv_name = f'../predictions/experiment1_plus_systemic_brightening/{experiment_name}/' + str(intensity_change) + '_by_' + str(brighten_sum) + '.csv'
    
    print(csv_name)
    
    test_transforms = transforms.Compose([transforms.Lambda(lambda img: systemic_brightening
                                                            (img, skeleton, thresh_type, intensity_change, brighten_sum)),
                                          # transforms.Resize(image_size),
                                          transforms.ToTensor(),
                                          transforms.Normalize([0.5, 0.5, 0.5],
                                                               [0.5, 0.5, 0.5])])
    
    test_folder = os.path.join(data_dir, 'test')
    test_dataset = datasets.ImageFolder(test_folder, test_transforms)    
    
    # Pytorch load model
    model = PretrainedModel(num_classes)
    model.load_state_dict(torch.load(model_path))
    
    # load into Skorch
    net = NeuralNetClassifier(model, 
                              criterion=nn.CrossEntropyLoss,
                              device=device)
    
    net.initialize() # bc I am not using net.fit (training the model)
    
    print ("Model Loaded + Initialized", model_path)
    
    img_locs = [loc for loc, _ in test_dataset.samples]
    img_ids = [re.findall(r'\d+', loc)[1] for loc in img_locs] # instantaneous basically
    
    #print(img_ids)
    
    img_race = [get_race_from_id(img_id, race_data_path) for img_id in tqdm(img_ids)]
    
    # print(img_race)
    print("Number of Images: " + str(len(img_race)))
    
    print ("Starting Predictions")
    
    test_probs = net.predict_proba(test_dataset)
    
    print ("Predictions Done")
    test_probs = [prob[1] for prob in test_probs] # probability of being white
    data = {'img_id' : img_ids, 'race': img_race, 'probability_' + str(brighten_sum) : test_probs}
    pd.DataFrame(data=data).to_csv(csv_name, index=False)
    
    print ("Code Done")

In [6]:
experiment_name = 'skeleton_model' # change depending on experiment
data_dir = "../dataset/experiment1_plus_systemic_brightening/"
model_path = 'outputs/checkpoints/model_skeletonized_epoch50.pt'

In [7]:
# no skeletonization

test(data_dir, 'below', 'brighten', 0, experiment_name, model_path) 
test(data_dir, 'below', 'brighten', 30, experiment_name, model_path) # skeleton = False
test(data_dir, 'below', 'brighten', 60, experiment_name, model_path) 
test(data_dir, 'below', 'brighten', 90, experiment_name, model_path) 
test(data_dir, 'below', 'brighten', 120, experiment_name, model_path) 
test(data_dir, 'below', 'brighten', 150, experiment_name, model_path) 

# ---------------------- dulled now

test(data_dir, 'below', 'dull', 30, experiment_name, model_path) 
test(data_dir, 'below', 'dull', 60, experiment_name, model_path) 
test(data_dir, 'below', 'dull', 90, experiment_name, model_path) 
test(data_dir, 'below', 'dull', 120, experiment_name, model_path) 
test(data_dir, 'below', 'dull', 150, experiment_name, model_path) 

../predictions/experiment1_plus_systemic_brightening/skeleton_model/brighten_by_0.csv


  0%|          | 0/4546 [00:00<?, ?it/s]

Model Loaded + Initialized outputs/checkpoints/model_skeletonized_epoch50.pt


100%|██████████| 4546/4546 [01:15<00:00, 59.96it/s]


Number of Images: 4546
Starting Predictions
Predictions Done
Code Done
../predictions/experiment1_plus_systemic_brightening/skeleton_model/brighten_by_30.csv


  0%|          | 7/4546 [00:00<01:14, 60.99it/s]

Model Loaded + Initialized outputs/checkpoints/model_skeletonized_epoch50.pt


100%|██████████| 4546/4546 [01:15<00:00, 60.17it/s]


Number of Images: 4546
Starting Predictions
Predictions Done
Code Done
../predictions/experiment1_plus_systemic_brightening/skeleton_model/brighten_by_60.csv


  0%|          | 7/4546 [00:00<01:14, 60.81it/s]

Model Loaded + Initialized outputs/checkpoints/model_skeletonized_epoch50.pt


100%|██████████| 4546/4546 [01:15<00:00, 60.50it/s]


Number of Images: 4546
Starting Predictions
Predictions Done
Code Done
../predictions/experiment1_plus_systemic_brightening/skeleton_model/brighten_by_90.csv


  0%|          | 7/4546 [00:00<01:13, 61.37it/s]

Model Loaded + Initialized outputs/checkpoints/model_skeletonized_epoch50.pt


100%|██████████| 4546/4546 [01:14<00:00, 60.74it/s]


Number of Images: 4546
Starting Predictions
Predictions Done
Code Done
../predictions/experiment1_plus_systemic_brightening/skeleton_model/brighten_by_120.csv


  0%|          | 7/4546 [00:00<01:14, 60.67it/s]

Model Loaded + Initialized outputs/checkpoints/model_skeletonized_epoch50.pt


100%|██████████| 4546/4546 [01:14<00:00, 60.95it/s]


Number of Images: 4546
Starting Predictions
Predictions Done
Code Done
../predictions/experiment1_plus_systemic_brightening/skeleton_model/brighten_by_150.csv


  0%|          | 7/4546 [00:00<01:13, 62.02it/s]

Model Loaded + Initialized outputs/checkpoints/model_skeletonized_epoch50.pt


100%|██████████| 4546/4546 [01:14<00:00, 61.15it/s]


Number of Images: 4546
Starting Predictions
Predictions Done
Code Done
../predictions/experiment1_plus_systemic_brightening/skeleton_model/dull_by_30.csv


  0%|          | 6/4546 [00:00<01:16, 59.48it/s]

Model Loaded + Initialized outputs/checkpoints/model_skeletonized_epoch50.pt


100%|██████████| 4546/4546 [01:14<00:00, 61.36it/s]


Number of Images: 4546
Starting Predictions
Predictions Done
Code Done
../predictions/experiment1_plus_systemic_brightening/skeleton_model/dull_by_60.csv


  0%|          | 7/4546 [00:00<01:11, 63.66it/s]

Model Loaded + Initialized outputs/checkpoints/model_skeletonized_epoch50.pt


100%|██████████| 4546/4546 [01:15<00:00, 60.37it/s]


Number of Images: 4546
Starting Predictions
Predictions Done
Code Done
../predictions/experiment1_plus_systemic_brightening/skeleton_model/dull_by_90.csv


  0%|          | 7/4546 [00:00<01:15, 59.95it/s]

Model Loaded + Initialized outputs/checkpoints/model_skeletonized_epoch50.pt


100%|██████████| 4546/4546 [01:14<00:00, 60.86it/s]


Number of Images: 4546
Starting Predictions
Predictions Done
Code Done
../predictions/experiment1_plus_systemic_brightening/skeleton_model/dull_by_120.csv


  0%|          | 6/4546 [00:00<01:17, 58.47it/s]

Model Loaded + Initialized outputs/checkpoints/model_skeletonized_epoch50.pt


100%|██████████| 4546/4546 [01:14<00:00, 60.87it/s]


Number of Images: 4546
Starting Predictions
Predictions Done
Code Done
../predictions/experiment1_plus_systemic_brightening/skeleton_model/dull_by_150.csv


  0%|          | 7/4546 [00:00<01:12, 62.50it/s]

Model Loaded + Initialized outputs/checkpoints/model_skeletonized_epoch50.pt


100%|██████████| 4546/4546 [01:15<00:00, 60.18it/s]


Number of Images: 4546
Starting Predictions
Predictions Done
Code Done


In [96]:
experiment_name = '' # change depending on experiment
data_dir = "../dataset/experiment1_plus_systemic_brightening/"
model_path = 'outputs/checkpoints/model_original_epoch50.pt'

In [49]:
test(data_dir, 'below', 'brighten', 30, experiment_name, model_path) # skeleton = False

  0%|          | 7/4546 [00:00<01:13, 61.48it/s]

Model Loaded + Initialized


100%|██████████| 4546/4546 [01:18<00:00, 57.61it/s]


['black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 

Predictions Done
Code Done


In [61]:
test(data_dir, 'below', 'brighten', 60, experiment_name, model_path) 
test(data_dir, 'below', 'brighten', 90, experiment_name, model_path) 
test(data_dir, 'below', 'brighten', 120, experiment_name, model_path) 
test(data_dir, 'below', 'brighten', 150, experiment_name, model_path) 

# ---------------------- dulled now

test(data_dir, 'below', 'dull', 30, experiment_name, model_path) 
test(data_dir, 'below', 'dull', 60, experiment_name, model_path) 
test(data_dir, 'below', 'dull', 90, experiment_name, model_path) 
test(data_dir, 'below', 'dull', 120, experiment_name, model_path) 
test(data_dir, 'below', 'dull', 150, experiment_name, model_path) 

  0%|          | 6/4546 [00:00<01:23, 54.27it/s]

Model Loaded + Initialized


100%|██████████| 4546/4546 [01:17<00:00, 58.78it/s]


Number of Images: 4546
Starting Predictions
Predictions Done
Code Done


  0%|          | 7/4546 [00:00<01:11, 63.43it/s]

Model Loaded + Initialized


100%|██████████| 4546/4546 [01:16<00:00, 59.15it/s]


Number of Images: 4546
Starting Predictions
Predictions Done
Code Done


  0%|          | 6/4546 [00:00<01:16, 59.28it/s]

Model Loaded + Initialized


100%|██████████| 4546/4546 [01:16<00:00, 59.10it/s]


Number of Images: 4546
Starting Predictions
Predictions Done
Code Done


  0%|          | 6/4546 [00:00<01:15, 59.96it/s]

Model Loaded + Initialized


100%|██████████| 4546/4546 [01:16<00:00, 59.15it/s]


Number of Images: 4546
Starting Predictions
Predictions Done
Code Done


  0%|          | 6/4546 [00:00<01:17, 58.51it/s]

Model Loaded + Initialized


100%|██████████| 4546/4546 [01:16<00:00, 59.06it/s]


Number of Images: 4546
Starting Predictions
Predictions Done
Code Done


  0%|          | 6/4546 [00:00<01:18, 57.78it/s]

Model Loaded + Initialized


100%|██████████| 4546/4546 [01:17<00:00, 58.70it/s]


Number of Images: 4546
Starting Predictions
Predictions Done
Code Done


  0%|          | 7/4546 [00:00<01:14, 60.75it/s]

Model Loaded + Initialized


100%|██████████| 4546/4546 [01:17<00:00, 58.92it/s]


Number of Images: 4546
Starting Predictions
Predictions Done
Code Done


  0%|          | 7/4546 [00:00<01:12, 63.00it/s]

Model Loaded + Initialized


100%|██████████| 4546/4546 [01:17<00:00, 58.90it/s]


Number of Images: 4546
Starting Predictions
Predictions Done
Code Done


  0%|          | 6/4546 [00:00<01:18, 57.87it/s]

Model Loaded + Initialized


100%|██████████| 4546/4546 [01:16<00:00, 59.16it/s]


Number of Images: 4546
Starting Predictions
Predictions Done
Code Done


In [102]:
test(data_dir, 'below', 'brighten', 0, experiment_name, model_path) 
# original image. No brightening effect



  0%|          | 0/4546 [00:00<?, ?it/s][A[A

  0%|          | 6/4546 [00:00<01:23, 54.35it/s][A[A

Model Loaded + Initialized




  0%|          | 12/4546 [00:00<01:22, 55.12it/s][A[A

  0%|          | 18/4546 [00:00<01:20, 55.95it/s][A[A

  1%|          | 24/4546 [00:00<01:20, 56.14it/s][A[A

  1%|          | 30/4546 [00:00<01:19, 56.68it/s][A[A

  1%|          | 37/4546 [00:00<01:17, 58.07it/s][A[A

  1%|          | 44/4546 [00:00<01:16, 58.89it/s][A[A

  1%|          | 51/4546 [00:00<01:15, 59.35it/s][A[A

  1%|▏         | 58/4546 [00:00<01:15, 59.61it/s][A[A

  1%|▏         | 65/4546 [00:01<01:14, 59.86it/s][A[A

  2%|▏         | 72/4546 [00:01<01:14, 60.35it/s][A[A

  2%|▏         | 79/4546 [00:01<01:14, 60.19it/s][A[A

  2%|▏         | 86/4546 [00:01<01:13, 60.54it/s][A[A

  2%|▏         | 93/4546 [00:01<01:14, 59.85it/s][A[A

  2%|▏         | 100/4546 [00:01<01:13, 60.34it/s][A[A

  2%|▏         | 107/4546 [00:01<01:14, 59.74it/s][A[A

  2%|▏         | 113/4546 [00:01<01:15, 58.99it/s][A[A

  3%|▎         | 120/4546 [00:02<01:14, 59.42it/s][A[A

  3%|▎         | 126/4546

 39%|███▉      | 1779/4546 [00:30<00:45, 60.42it/s][A[A

 39%|███▉      | 1786/4546 [00:30<00:46, 59.04it/s][A[A

 39%|███▉      | 1793/4546 [00:30<00:46, 59.18it/s][A[A

 40%|███▉      | 1799/4546 [00:30<00:46, 59.25it/s][A[A

 40%|███▉      | 1806/4546 [00:30<00:45, 59.60it/s][A[A

 40%|███▉      | 1812/4546 [00:30<00:46, 59.29it/s][A[A

 40%|████      | 1819/4546 [00:30<00:45, 59.29it/s][A[A

 40%|████      | 1825/4546 [00:31<00:46, 58.78it/s][A[A

 40%|████      | 1831/4546 [00:31<00:46, 58.35it/s][A[A

 40%|████      | 1837/4546 [00:31<00:46, 58.43it/s][A[A

 41%|████      | 1843/4546 [00:31<00:46, 58.06it/s][A[A

 41%|████      | 1849/4546 [00:31<00:46, 57.39it/s][A[A

 41%|████      | 1856/4546 [00:31<00:46, 58.21it/s][A[A

 41%|████      | 1862/4546 [00:31<00:46, 57.60it/s][A[A

 41%|████      | 1868/4546 [00:31<00:46, 57.94it/s][A[A

 41%|████      | 1875/4546 [00:31<00:45, 58.91it/s][A[A

 41%|████▏     | 1881/4546 [00:32<00:45, 58.65it/s][A[

 77%|███████▋  | 3516/4546 [00:59<00:17, 59.05it/s][A[A

 77%|███████▋  | 3523/4546 [00:59<00:17, 60.17it/s][A[A

 78%|███████▊  | 3530/4546 [01:00<00:16, 60.40it/s][A[A

 78%|███████▊  | 3537/4546 [01:00<00:16, 59.87it/s][A[A

 78%|███████▊  | 3544/4546 [01:00<00:16, 59.86it/s][A[A

 78%|███████▊  | 3550/4546 [01:00<00:16, 59.26it/s][A[A

 78%|███████▊  | 3557/4546 [01:00<00:16, 60.01it/s][A[A

 78%|███████▊  | 3563/4546 [01:00<00:16, 59.63it/s][A[A

 79%|███████▊  | 3569/4546 [01:00<00:16, 59.32it/s][A[A

 79%|███████▊  | 3576/4546 [01:00<00:16, 60.15it/s][A[A

 79%|███████▉  | 3583/4546 [01:01<00:16, 60.08it/s][A[A

 79%|███████▉  | 3590/4546 [01:01<00:15, 60.00it/s][A[A

 79%|███████▉  | 3596/4546 [01:01<00:16, 58.74it/s][A[A

 79%|███████▉  | 3603/4546 [01:01<00:15, 59.39it/s][A[A

 79%|███████▉  | 3609/4546 [01:01<00:15, 59.24it/s][A[A

 80%|███████▉  | 3615/4546 [01:01<00:15, 58.37it/s][A[A

 80%|███████▉  | 3621/4546 [01:01<00:15, 58.19it/s][A[

Number of Images: 4546
Starting Predictions
Predictions Done
Code Done


## Analyze Results

In [3]:
!pwd

/users/riya/race/classifier_experiments


In [4]:
results_path = "predictions/experiment1_plus_systemic_brightening/skeleton_model/"

### Compile Arrays

In [5]:
increments_arr = [0,30, 60, 90, 120, 150] # do the original prediction too, I totally forgot about that oopsies

In [6]:
bright30preds = pd.read_csv(results_path + "brighten_by_30.csv")
dull30preds = pd.read_csv(results_path + "dull_by_30.csv")

In [7]:
# develop complete brightened array, considering dataframes are in the same order
brightened_predictions = bright30preds[['img_id','race']].copy()
dull_predictions = dull30preds[['img_id','race']].copy()

# should be the same arrays

In [12]:
def combine_result_frames(combined_df, intensity_change, increments_arr):
    
    complete_predictions = combined_df.copy()
    csv_name = ''
    
    for i in range(len(increments_arr)):
        if intensity_change == 'brighten':
            prediction_df =  pd.read_csv(results_path + f"brighten_by_{increments_arr[i]}.csv")
            csv_name = 'brightpreds.csv'
        elif intensity_change == 'dull':
            prediction_df =  pd.read_csv(results_path + f"dull_by_{increments_arr[i]}.csv")
            csv_name = 'dullpreds.csv'

        colname = prediction_df.columns[2]
        prediction_col = prediction_df.iloc[:,2]

        insert_pos = len(combined_df.columns)
        complete_predictions.insert(insert_pos, colname, prediction_col)
    
    complete_predictions.to_csv(results_path + csv_name)

    return complete_predictions

In [13]:
brightpreds = combine_result_frames(brightened_predictions, 'brighten', increments_arr)

In [14]:
brightpreds.head()

Unnamed: 0,img_id,race,probability_150,probability_120,probability_90,probability_60,probability_30,probability_0
0,101127,black,0.011595,0.012912,0.016962,0.026338,0.050886,0.067836
1,101128,black,0.164268,0.166308,0.146381,0.092394,0.051615,0.030836
2,101129,black,0.008155,0.007646,0.007741,0.007821,0.007899,0.005908
3,101130,black,0.005517,0.005347,0.005242,0.007844,0.013683,0.016326
4,101131,black,0.03212,0.041045,0.05297,0.071659,0.096342,0.0923


In [15]:
brightpreds_white = brightpreds[brightpreds['race'] == 'white']
brightpreds_black = brightpreds[brightpreds['race'] == 'black']

While probability outputs ARE NOT confidence exactly, if one is larger than the other, for instance, confidence is higher. Relative confidence can be determined, although not exact model confidence.

In [17]:
brightpreds_white.head()

Unnamed: 0,img_id,race,probability_150,probability_120,probability_90,probability_60,probability_30,probability_0
1709,100281,white,0.031918,0.023943,0.01872,0.011903,0.005556,0.002899
1710,100282,white,0.049282,0.052149,0.053809,0.046064,0.033878,0.032848
1711,100283,white,0.003321,0.004094,0.004276,0.004087,0.003478,0.002371
1712,100284,white,0.006728,0.00462,0.003261,0.002176,0.001335,0.000554
1713,100285,white,0.017935,0.016385,0.014592,0.012146,0.009924,0.007472


In [18]:
len(brightpreds), len(brightpreds_white), len(brightpreds_black)

(4546, 2837, 1709)

In [19]:
dullpreds = combine_result_frames(dull_predictions, 'dull', increments_arr[1:])

In [20]:
dullpreds.head()

Unnamed: 0,img_id,race,probability_150,probability_120,probability_90,probability_60,probability_30
0,101127,black,0.011909,0.009557,0.013532,0.035277,0.053298
1,101128,black,0.037104,0.02943,0.015921,0.007985,0.01054
2,101129,black,0.005985,0.001869,0.001018,0.000903,0.001822
3,101130,black,0.035109,0.028922,0.031499,0.028128,0.019299
4,101131,black,0.020138,0.022647,0.025004,0.05292,0.032119


In [21]:
dullpreds_white = dullpreds[dullpreds['race'] == 'white']
dullpreds_black = dullpreds[dullpreds['race'] == 'black']

In [22]:
len(dullpreds), len(dullpreds_white), len(dullpreds_black)

(4546, 2837, 1709)

### Determine Percent Misclassification

In [7]:
results_path = "/users/riya/race/classifier_experiments/CNN_train/outputs/probabilities/#10(macula_focus)/"

In [83]:
show_macula_non_skel = pd.read_csv(results_path + "show_macula_brightened_by_0_epoch50.csv")
show_macula_skel = pd.read_csv(results_path + "show_macula_brightened_by_0_skeletonized_epoch50.csv")

hide_macula_non_skel = pd.read_csv(results_path + "hide_macula_brightened_by_0_epoch50.csv")
hide_macula_skel = pd.read_csv(results_path + "hide_macula_brightened_by_0_skeletonized_epoch50.csv")

In [84]:
hide_macula_skel

Unnamed: 0,img_loc,probability
0,/users/riya/race/classifier_experiments/CNN_tr...,0.720475
1,/users/riya/race/classifier_experiments/CNN_tr...,0.866231
2,/users/riya/race/classifier_experiments/CNN_tr...,0.746231
3,/users/riya/race/classifier_experiments/CNN_tr...,0.467659
4,/users/riya/race/classifier_experiments/CNN_tr...,0.934661
...,...,...
905,/users/riya/race/classifier_experiments/CNN_tr...,0.081580
906,/users/riya/race/classifier_experiments/CNN_tr...,0.225085
907,/users/riya/race/classifier_experiments/CNN_tr...,0.725670
908,/users/riya/race/classifier_experiments/CNN_tr...,0.068766


In [85]:
hide_macula_skel['img_loc'][500]

'/users/riya/race/classifier_experiments/CNN_train/dataset_full/test/white/33937.bmp'

In [86]:
race_arr = [0] * len(hide_macula_skel)

for i in range(len(hide_macula_skel)):
    test_phrase = hide_macula_skel['img_loc'][i]
    if 'black' in test_phrase:
        race_arr[i] = 0
    if 'white' in test_phrase:
        race_arr[i] = 1

In [87]:
len(race_arr)

910

In [88]:
hide_macula_skel['race'] = race_arr

In [89]:
hide_macula_skel[hide_macula_skel['race'] == 1]

Unnamed: 0,img_loc,probability,race
342,/users/riya/race/classifier_experiments/CNN_tr...,0.866755,1
343,/users/riya/race/classifier_experiments/CNN_tr...,0.144750,1
344,/users/riya/race/classifier_experiments/CNN_tr...,0.566279,1
345,/users/riya/race/classifier_experiments/CNN_tr...,0.038444,1
346,/users/riya/race/classifier_experiments/CNN_tr...,0.026375,1
...,...,...,...
905,/users/riya/race/classifier_experiments/CNN_tr...,0.081580,1
906,/users/riya/race/classifier_experiments/CNN_tr...,0.225085,1
907,/users/riya/race/classifier_experiments/CNN_tr...,0.725670,1
908,/users/riya/race/classifier_experiments/CNN_tr...,0.068766,1


In [90]:
fpr, tpr, _ = roc_curve(hide_macula_skel['race'],  1 - hide_macula_skel['probability'])
auc = roc_auc_score(hide_macula_skel['race'], 1 - hide_macula_skel['probability'])
auc = np.round(auc, 4)

In [91]:
auc

0.8957

In [66]:
def percent_predictions(preds_df, race, increments_arr = []):
    
    pred = ''
    
    if (race == 'black'):
        pred = 'white'
    elif (race == 'white'):
        pred = 'black'
    
    print(f'Percent of {race} Images Incorrectly Predicted as {pred}')
    
    def preds_print(colname):
        if (race == 'white'): # getting the percent predicted correctly as white
            predscol = preds_df[preds_df[colname] > 0.5]
        elif (race == 'black'): # getting the percent predicted correctly as black
            predscol = preds_df[preds_df[colname] < 0.5]
        
        right_percent = 100 * len(predscol)/len(preds_df) 
        wrong_percent = 100 - right_percent
        print(str(colname) + ':' + str(wrong_percent))
    
    # preds_print('original') # will do soon
    
    preds_print('probability')

In [92]:
white_show_macula_non_skel = hide_macula_skel[hide_macula_skel['race'] == 1]
black_show_macula_non_skel = hide_macula_skel[hide_macula_skel['race'] == 0]

In [93]:
percent_predictions(white_show_macula_non_skel, 'white')

Percent of white Images Incorrectly Predicted as black
probability:79.5774647887324


In [94]:
percent_predictions(black_show_macula_non_skel, 'black')

Percent of black Images Incorrectly Predicted as white
probability:82.45614035087719


In [24]:
percent_predictions(brightpreds_white, 'white', increments_arr)

Percent of white Images Incorrectly Predicted as black
probability_0:97.10962284102925
probability_30:95.02996122664787
probability_60:94.25449418399718
probability_90:94.53648219950652
probability_120:95.59393725766655
probability_150:96.61614381388792


In [25]:
percent_predictions(brightpreds_black, 'black', increments_arr)

Percent of black Images Incorrectly Predicted as white
probability_0:0.05851375073142151
probability_30:0.17554125219426453
probability_60:0.05851375073142151
probability_90:0.0
probability_120:0.0
probability_150:0.0


In [27]:
percent_predictions(dullpreds_white, 'white', increments_arr[1:])

Percent of white Images Incorrectly Predicted as black
probability_30:99.57701797673599
probability_60:99.92950299612266
probability_90:100.0
probability_120:100.0
probability_150:100.0


In [28]:
percent_predictions(dullpreds_black, 'black', increments_arr[1:])

Percent of black Images Incorrectly Predicted as white
probability_30:0.0
probability_60:0.05851375073142151
probability_90:0.05851375073142151
probability_120:0.0
probability_150:0.0


### Old Code

In [56]:
percent_predictions(bright30preds_white, 'white')

Percent of Images Incorrectly Predicted as 
probability_30:23.68699330278463


In [12]:
percent_predictions(bright_white_preds, 'white')

Percent of Images Incorrectly Predicted as 
30:23.68699330278463
60:100.0
90:100.0
120:100.0
150:100.0


In [31]:
bright_white_preds

Unnamed: 0,id,30,60,90,120,150,race
1,82379,0.014365,0.001441,0.001383,0.001755,0.002071,white
5,87380,0.932330,0.928991,0.866641,0.566693,0.192995,white
7,43868,0.197987,0.004664,0.002383,0.001999,0.001897,white
9,102795,0.997897,0.389615,0.024940,0.009268,0.006931,white
12,84991,0.990329,0.622761,0.062046,0.010388,0.004232,white
...,...,...,...,...,...,...,...
4537,96939,0.000316,0.000316,0.000316,0.000316,0.000316,white
4539,71512,0.000316,0.000316,0.000316,0.000316,0.000316,white
4540,18169,0.000316,0.000316,0.000316,0.000316,0.000316,white
4544,33415,0.000316,0.000316,0.000316,0.000316,0.000316,white


In [34]:
bright_black_preds

Unnamed: 0,id,30,60,90,120,150,race
0,101934,0.156814,0.239794,0.080069,0.027195,0.013532,black
2,87427,0.000071,0.000078,0.000162,0.000218,0.000225,black
3,49236,0.000039,0.000071,0.000110,0.000147,0.000196,black
4,45251,0.001444,0.003900,0.007533,0.011245,0.013532,black
6,39426,0.000392,0.000634,0.000739,0.000883,0.000975,black
...,...,...,...,...,...,...,...
4535,7451,0.000316,0.000316,0.000316,0.000316,0.000316,black
4538,101738,0.000316,0.000316,0.000316,0.000316,0.000316,black
4541,92793,0.000316,0.000316,0.000316,0.000316,0.000316,black
4542,31340,0.000316,0.000316,0.000316,0.000316,0.000316,black


In [51]:
dull_black_preds

Unnamed: 0,id,30,60,90,120,150,race
0,101934,0.043204,0.027118,0.018960,0.024586,0.046725,black
2,87427,0.081046,0.938463,0.965835,0.974043,0.972831,black
3,49236,0.000083,0.000636,0.003286,0.037644,0.157525,black
4,45251,0.003016,0.017193,0.167015,0.869160,0.975249,black
6,39426,0.000888,0.002742,0.013480,0.057652,0.112210,black
...,...,...,...,...,...,...,...
4535,7451,0.000316,0.000316,0.000316,0.000316,0.000316,black
4538,101738,0.000316,0.000316,0.000316,0.000316,0.000316,black
4541,92793,0.000316,0.000316,0.000316,0.000316,0.000316,black
4542,31340,0.000316,0.000316,0.000316,0.000316,0.000316,black


3074

In [37]:
bright_black_preds[bright_black_preds['30'] == 0.000316]

Unnamed: 0,id,30,60,90,120,150,race


In [46]:
pd.value_counts(bright_black_preds['150'])

0.000316    1537
0.002296       3
0.000510       2
0.000166       2
0.000546       2
            ... 
0.000688       1
0.000223       1
0.003148       1
0.001158       1
0.000401       1
Name: 150, Length: 163, dtype: int64

In [40]:
dull_white_preds

Unnamed: 0,id,30,60,90,120,150,race
1,82379,0.923868,0.958770,0.852935,0.389010,0.125493,white
5,87380,0.727443,0.798344,0.676887,0.556769,0.556083,white
7,43868,0.995435,0.998239,0.998516,0.996190,0.982757,white
9,102795,0.988313,0.903720,0.666985,0.264738,0.154887,white
12,84991,0.999913,0.999957,0.999769,0.998713,0.982271,white
...,...,...,...,...,...,...,...
4537,96939,0.000316,0.000316,0.000316,0.000316,0.000316,white
4539,71512,0.000316,0.000316,0.000316,0.000316,0.000316,white
4540,18169,0.000316,0.000316,0.000316,0.000316,0.000316,white
4544,33415,0.000316,0.000316,0.000316,0.000316,0.000316,white


In [48]:
pd.value_counts(dull_white_preds['30'])

0.000316    2559
0.999774       2
0.999913       2
0.999993       2
0.999543       2
            ... 
0.963780       1
0.999115       1
0.999137       1
0.936401       1
0.986310       1
Name: 30, Length: 273, dtype: int64

In [49]:
2559/2837

0.9020091646105041

In [50]:
1537/1709

0.8993563487419544

## Quick Checks

In [98]:
checksum_arr = [0] * len(test_img_arr)

for i in range(len(test_img_arr)):
    test_img_arr[i] = test_img_arr[i].astype(np.uint8)
    all_sum = np.concatenate(test_img_arr[i]).sum()
    checksum_arr[i] = all_sum    

In [125]:
pd.value_counts(checksum_arr)

1017804.0    2
1062438.0    2
228525.0     2
498261.0     2
613530.0     2
            ..
1108473.0    1
1890948.0    1
1812981.0    1
1255920.0    1
110592.0     1
Length: 4495, dtype: int64

In [None]:
for i in test_img_arr:
    plt.imshow(i.astype(np.uint8), interpolation = 'nearest')
    plt.show()