In [1]:
import os
import shutil
import hickle
import re

import pandas as pd
import tempfile
import matplotlib.pyplot as plt
import matplotlib.pylab as pylab

import numpy as np
import PIL
from PIL import Image, ImageChops, ImageDraw
import torch
from torch import nn
import torchvision.models
import cv2

from pathlib import Path

import tensorflow as tf
from tensorflow import keras
import matplotlib.patches as patches

from tqdm import tqdm
from numba import jit, cuda
from utils import systemic_brightening
import warnings

In [2]:
# set directory
os.chdir("/users/riya/race/classifier_experiments") # which one? yep

# ignore warnings
warnings.filterwarnings("ignore")

# import model
segmentation_classifier = keras.models.load_model('models/MIMIC-256x25680-20-split-resnet-Float16_2-race_detection_rop_seg_data_rop_seg-0.001_20220321-054140_epoch:011.hdf5')

The dtype policy mixed_float16 may run slowly because this machine does not have a GPU. Only Nvidia GPUs with compute capability of at least 7.0 run quickly with mixed_float16.
Instructions for updating:
Use tf.keras.mixed_precision.LossScaleOptimizer instead. LossScaleOptimizer now has all the functionality of DynamicLossScale


In [3]:
def image_from_id(img_path, path_name):
    arr = np.array(Image.open(img_path + path_name))
    resized = cv2.resize(arr, (256,256))
    channels = np.repeat(resized[:, :, np.newaxis], 3, axis=2).reshape((256,256,3))
    
    return channels

In [4]:
def get_race_from_id(img_id, race_csv_path):

    race_data = pd.read_csv(race_csv_path)
    img_row = race_data.loc[race_data['image_id'] == int(img_id)] # they both must be ints
    img_row = img_row.reset_index(drop=True) # for .at to work
    img_race = img_row.at[0,'race']
    
    return img_race    

In [5]:
# run through classifier

@jit         
def predict_on_images(img_path, preds_df, colname,
                     skeleton, thresh_type, intensity_change, brighten_sum,
                     csv_name = "brightened_predictions", 
                     preds_path = "/users/riya/race/classifier_experiments/predictions/experiment1_plus_systemic_brightening/", 
                     race_csv_path = "/users/riya/race/csv/image_race_data.csv"): 
    
    img_files = os.listdir(dataset_path)
    num_images = len(img_files)
    
    print(num_images) # fine, 4546
    
    id_arr = [0] * num_images
    race_arr = [0] * num_images
    img_arr = np.zeros((num_images, 256, 256, 3))
    
    preds_arr = [0] * num_images
    
    for i in tqdm(range(num_images)): # add tqdm back later
        channels = image_from_id(img_path, img_files[i])
        modified_img = systemic_brightening(channels, skeleton, thresh_type, intensity_change, brighten_sum,
                                           image_size = (256, 256))
        modified_img = np.array(modified_img) # .reshape((1,256,256,3)) np reshape, bc substitute? 
        img_arr[i] = modified_img.astype(np.uint8) # uint8
                       
        # getting id     
        img_id = re.findall(r'\d+', img_files[i])[0] # only one number
        id_arr[i] = int(img_id) # be sure it's int
        
        # print(type(id_arr[i]))

        # getting race
        img_race = get_race_from_id(img_id, race_csv_path)
        race_arr[i] = img_race
    
    # return img_arr

    # print(len(img_arr[i]))
    # print(img_arr.shape)

    preds_df['id'] = id_arr
    preds_df['race'] = race_arr

    # getting prediction    
    prediction = segmentation_classifier(img_arr)

    for i in range(num_images):
        preds_arr[i] = prediction.numpy()[i, 1] # returning the white prediction for each image

    preds_df[colname] = preds_arr           
    preds_df.to_csv(preds_path + csv_name + ".csv")


In [6]:
all_predictions = pd.DataFrame(columns = ['id', '30', '60', '90', '120', '150']) # from id I can get race

dataset_path = "/users/riya/race/dataset/segmentations/"

In [7]:
predict_on_images(dataset_path, all_predictions, '30', False, 'below', 'brighten', 30)

4546


100%|██████████| 4546/4546 [13:20<00:00,  5.68it/s]


In [8]:
bright_preds = pd.read_csv("/users/riya/race/classifier_experiments/predictions/experiment1_plus_systemic_brightening/" 
                           + "brightened_predictions.csv", usecols = ['id', 'race','30', '60', '90', '120', '150'])

In [9]:
bright_preds

Unnamed: 0,id,30,60,90,120,150,race
0,101934,0.156814,,,,,black
1,82379,0.014365,,,,,white
2,87427,0.000071,,,,,black
3,49236,0.000039,,,,,black
4,45251,0.001444,,,,,black
...,...,...,...,...,...,...,...
4541,92793,0.000316,,,,,black
4542,31340,0.000316,,,,,black
4543,99058,0.000316,,,,,black
4544,33415,0.000316,,,,,white


In [9]:
predict_on_images(dataset_path, all_predictions, '30', False, 'below', 'brighten', 30)
predict_on_images(dataset_path, all_predictions, '60', False, 'below', 'brighten', 60)
predict_on_images(dataset_path, all_predictions, '90', False, 'below', 'brighten', 90)
predict_on_images(dataset_path, all_predictions, '120', False, 'below', 'brighten', 120)         
predict_on_images(dataset_path, all_predictions, '150', False, 'below', 'brighten', 150)     

100%|██████████| 4546/4546 [15:15<00:00,  4.96it/s]
100%|██████████| 4546/4546 [12:36<00:00,  6.01it/s]
100%|██████████| 4546/4546 [12:40<00:00,  5.98it/s]
100%|██████████| 4546/4546 [12:44<00:00,  5.95it/s]
100%|██████████| 4546/4546 [12:46<00:00,  5.93it/s]


In [10]:
predict_on_images(dataset_path, all_predictions, '30', False, 'below', 'dull', 30, csv_name = "dulled_predictions")
predict_on_images(dataset_path, all_predictions, '60', False, 'below', 'dull', 60, csv_name = "dulled_predictions")
predict_on_images(dataset_path, all_predictions, '90', False, 'below', 'dull', 90, csv_name = "dulled_predictions")
predict_on_images(dataset_path, all_predictions, '120', False, 'below', 'dull', 120, csv_name = "dulled_predictions")         
predict_on_images(dataset_path, all_predictions, '150', False, 'below', 'dull', 150, csv_name = "dulled_predictions")                          

100%|██████████| 4546/4546 [12:37<00:00,  6.00it/s]
100%|██████████| 4546/4546 [12:42<00:00,  5.96it/s]
100%|██████████| 4546/4546 [12:40<00:00,  5.98it/s]
100%|██████████| 4546/4546 [12:49<00:00,  5.91it/s]
100%|██████████| 4546/4546 [12:52<00:00,  5.89it/s]


5


### Analyze Results

In [15]:
bright_preds = pd.read_csv("/users/riya/race/classifier_experiments/predictions/experiment1_plus_systemic_brightening/" 
                           + "brightened_predictions.csv", usecols = ['id', 'race','30', '60', '90', '120', '150'])

In [29]:
bright_white_preds = bright_preds[bright_preds['race'] == 'white']
bright_black_preds = bright_preds[bright_preds['race'] == 'black']

In [18]:
dull_preds = pd.read_csv("/users/riya/race/classifier_experiments/predictions/experiment1_plus_systemic_brightening/" 
                           + "dulled_predictions.csv", usecols = ['id', 'race','30', '60', '90', '120', '150'])

In [20]:
dull_white_preds = dull_preds[dull_preds['race'] == 'white']
dull_black_preds = dull_preds[dull_preds['race'] == 'black']

In [24]:
def percent_predictions(preds_df, race):
    
    pred = ''
    
    if (race == 'black'):
        pred == 'white'
    elif (race == 'white'):
        pred == 'black'
    
    print('Percent of Images Incorrectly Predicted as ' + pred)
    
    def preds_print(colname):
        if (race == 'white'): # getting the percent predicted correctly as white
            predscol = preds_df[preds_df[colname] > 0.5]
        elif (race == 'black'): # getting the percent predicted correctly as black
            predscol = preds_df[preds_df[colname] < 0.5]
            
        right_percent = 100 * len(predscol)/len(preds_df) 
        wrong_percent = 100 - right_percent
        print(str(colname) + ':' + str(wrong_percent))
    
    preds_print('30')
    preds_print('60')
    preds_print('90')
    preds_print('120')
    preds_print('150')

In [25]:
percent_predictions(bright_white_preds, 'white')

Percent of Images Incorrectly Predicted as 
30:92.24532957349312
60:96.1931617906239
90:98.66055692633063
120:99.57701797673599
150:99.85900599224533


In [31]:
bright_white_preds

Unnamed: 0,id,30,60,90,120,150,race
1,82379,0.014365,0.001441,0.001383,0.001755,0.002071,white
5,87380,0.932330,0.928991,0.866641,0.566693,0.192995,white
7,43868,0.197987,0.004664,0.002383,0.001999,0.001897,white
9,102795,0.997897,0.389615,0.024940,0.009268,0.006931,white
12,84991,0.990329,0.622761,0.062046,0.010388,0.004232,white
...,...,...,...,...,...,...,...
4537,96939,0.000316,0.000316,0.000316,0.000316,0.000316,white
4539,71512,0.000316,0.000316,0.000316,0.000316,0.000316,white
4540,18169,0.000316,0.000316,0.000316,0.000316,0.000316,white
4544,33415,0.000316,0.000316,0.000316,0.000316,0.000316,white


In [34]:
bright_black_preds

Unnamed: 0,id,30,60,90,120,150,race
0,101934,0.156814,0.239794,0.080069,0.027195,0.013532,black
2,87427,0.000071,0.000078,0.000162,0.000218,0.000225,black
3,49236,0.000039,0.000071,0.000110,0.000147,0.000196,black
4,45251,0.001444,0.003900,0.007533,0.011245,0.013532,black
6,39426,0.000392,0.000634,0.000739,0.000883,0.000975,black
...,...,...,...,...,...,...,...
4535,7451,0.000316,0.000316,0.000316,0.000316,0.000316,black
4538,101738,0.000316,0.000316,0.000316,0.000316,0.000316,black
4541,92793,0.000316,0.000316,0.000316,0.000316,0.000316,black
4542,31340,0.000316,0.000316,0.000316,0.000316,0.000316,black


In [51]:
dull_black_preds

Unnamed: 0,id,30,60,90,120,150,race
0,101934,0.043204,0.027118,0.018960,0.024586,0.046725,black
2,87427,0.081046,0.938463,0.965835,0.974043,0.972831,black
3,49236,0.000083,0.000636,0.003286,0.037644,0.157525,black
4,45251,0.003016,0.017193,0.167015,0.869160,0.975249,black
6,39426,0.000888,0.002742,0.013480,0.057652,0.112210,black
...,...,...,...,...,...,...,...
4535,7451,0.000316,0.000316,0.000316,0.000316,0.000316,black
4538,101738,0.000316,0.000316,0.000316,0.000316,0.000316,black
4541,92793,0.000316,0.000316,0.000316,0.000316,0.000316,black
4542,31340,0.000316,0.000316,0.000316,0.000316,0.000316,black


3074

In [37]:
bright_black_preds[bright_black_preds['30'] == 0.000316]

Unnamed: 0,id,30,60,90,120,150,race


In [46]:
pd.value_counts(bright_black_preds['150'])

0.000316    1537
0.002296       3
0.000510       2
0.000166       2
0.000546       2
            ... 
0.000688       1
0.000223       1
0.003148       1
0.001158       1
0.000401       1
Name: 150, Length: 163, dtype: int64

In [40]:
dull_white_preds

Unnamed: 0,id,30,60,90,120,150,race
1,82379,0.923868,0.958770,0.852935,0.389010,0.125493,white
5,87380,0.727443,0.798344,0.676887,0.556769,0.556083,white
7,43868,0.995435,0.998239,0.998516,0.996190,0.982757,white
9,102795,0.988313,0.903720,0.666985,0.264738,0.154887,white
12,84991,0.999913,0.999957,0.999769,0.998713,0.982271,white
...,...,...,...,...,...,...,...
4537,96939,0.000316,0.000316,0.000316,0.000316,0.000316,white
4539,71512,0.000316,0.000316,0.000316,0.000316,0.000316,white
4540,18169,0.000316,0.000316,0.000316,0.000316,0.000316,white
4544,33415,0.000316,0.000316,0.000316,0.000316,0.000316,white


In [48]:
pd.value_counts(dull_white_preds['30'])

0.000316    2559
0.999774       2
0.999913       2
0.999993       2
0.999543       2
            ... 
0.963780       1
0.999115       1
0.999137       1
0.936401       1
0.986310       1
Name: 30, Length: 273, dtype: int64

In [49]:
2559/2837

0.9020091646105041

In [50]:
1537/1709

0.8993563487419544

## Quick Checks

In [98]:
checksum_arr = [0] * len(test_img_arr)

for i in range(len(test_img_arr)):
    test_img_arr[i] = test_img_arr[i].astype(np.uint8)
    all_sum = np.concatenate(test_img_arr[i]).sum()
    checksum_arr[i] = all_sum    

In [125]:
pd.value_counts(checksum_arr)

1017804.0    2
1062438.0    2
228525.0     2
498261.0     2
613530.0     2
            ..
1108473.0    1
1890948.0    1
1812981.0    1
1255920.0    1
110592.0     1
Length: 4495, dtype: int64

In [None]:
for i in test_img_arr:
    plt.imshow(i.astype(np.uint8), interpolation = 'nearest')
    plt.show()