In [None]:
import statsmodels.api as sm
import statsmodels.formula.api as smf
import pandas as pd
from pathlib import Path
import torch
import os
from torchvision import transforms
from torchvision.transforms import ToTensor, Resize, Compose, RandomHorizontalFlip, RandomRotation
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm
import numpy as np
import cv2
from matplotlib import cm
from scipy.stats import ttest_ind

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
df = pd.read_csv('/content/drive/Shareddrives/AC297R/images_new/scores-scaled-combined.csv')
df_lmm = df.copy()

In [None]:
# sort each row according to timestamp group by id
sorted_ids = []
for i in [1,2,3,4,5]:
  s = df_lmm[df_lmm['Id'] == i].sort_values('Timestamp (From Photo)(MMDD-YYYY-HHMMSS)')
  sorted_ids.append(s)
df_tsorted = pd.concat(sorted_ids, ignore_index=True)

In [None]:
# Process each median  (1/0 above or below median)
score_cols = ['scores_xuliang', 'scores_siqiao', 'scores_joslyn', 'scores_shuheng', 'scores_siqi']

for col in score_cols:
  df_tsorted[col+'_post_median'] = df_tsorted[['Id', col]].groupby(['Id']).apply(lambda x: x[col] >= x[col].median()).reset_index(drop=True).astype(float)

df_tsorted

Unnamed: 0,Timestamp (From Photo)(MMDD-YYYY-HHMMSS),Id,Image Id(Id-Timestamp),Temperature (°F),"Activity Level (Categorical), i.e none, light, intense",Applied Lotion/Makeup\n(Boolean),Intervention\n(Boolean),Unnamed: 7,scores_siqiao,scores_siqi,scores_shuheng,scores_xuliang,scores_joslyn,scores_xuliang_post_median,scores_siqiao_post_median,scores_joslyn_post_median,scores_shuheng_post_median,scores_siqi_post_median
0,1015-2022-134230,1,1-1015-2022-134230.jpg,78,,False,FALSE,,0.571429,0.60,0.500000,0.142857,0.500,0.0,1.0,1.0,1.0,1.0
1,1015-2022-174630,1,1-1015-2022-174630.jpg,78,,False,FALSE,,0.857143,1.00,0.666667,0.571429,0.500,1.0,1.0,1.0,1.0,1.0
2,1015-2022-232914,1,1-1015-2022-232914.jpg,78,,False,FALSE,,0.571429,0.40,0.166667,0.428571,0.250,0.0,1.0,0.0,0.0,0.0
3,1016-2022-122933,1,1-1016-2022-122933.jpg,77,,False,FALSE,,0.571429,0.80,0.333333,0.714286,0.375,1.0,1.0,1.0,1.0,1.0
4,1016-2022-180003,1,1-1016-2022-180003.jpg,77,,False,FALSE,,0.428571,0.00,0.000000,0.285714,0.250,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
250,1029-2022-161935,5,5-1029-2022-161935.jpg,75,Light,False,TRUE,,0.500000,0.75,0.500000,0.076923,0.400,0.0,1.0,1.0,1.0,1.0
251,1030-2022-012914,5,5-1030-2022-012914.jpg,75,,True,TRUE,,0.833333,1.00,0.500000,0.692308,0.800,1.0,1.0,1.0,1.0,1.0
252,1030-2022-172704,5,5-1030-2022-172704.jpg,75,,True,TRUE,,0.500000,1.00,0.750000,0.846154,0.600,1.0,1.0,1.0,1.0,1.0
253,1030-2022-204923,5,5-1030-2022-204923.jpg,75,,False,TRUE,,0.333333,0.25,0.500000,0.076923,0.800,0.0,0.0,1.0,1.0,0.0


In [None]:
def increase_brightness(img, value=30):
    value = np.random.randint(low=0, high=30, size=1)[0] # select the amount of random increase brightness
    img = np.array(img)
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    h, s, v = cv2.split(hsv)

    lim = 255 - value
    v[v > lim] = 255
    v[v <= lim] += value

    final_hsv = cv2.merge((h, s, v))
    img = cv2.cvtColor(final_hsv, cv2.COLOR_HSV2BGR)

    return Image.fromarray(img.astype('uint8'), 'RGB')

In [None]:
IMAGE_DIR = Path('/content/drive/Shareddrives/AC297R/images_new')

train_trans = Compose([
    transforms.Lambda(increase_brightness),
    Resize((224, 224)),
    RandomHorizontalFlip(0.7),
    RandomRotation([5, 10]),
    ToTensor(),
])

inf_trans = Compose([
    Resize((224, 224)),
    ToTensor(),
])

class ImageDataset(torch.utils.data.Dataset):
    def __init__(self, df, inf = False, train_trans = train_trans, inf_trans = inf_trans):
        self.xs = []
        self.ys = []
        self.train_trans = train_trans
        self.inf_trans = inf_trans
        self.inf = inf

        for i, row in tqdm(df.iterrows()):
            fn = row['Image Id(Id-Timestamp)']
            im = Image.open(IMAGE_DIR / fn[0] / fn)
            if self.inf == True:
              self.xs.append(inf_trans(im))
              self.ys.append(-1)
            else:
              self.xs.append(inf_trans(im))
              self.xs.append(train_trans(im))
              yy = float(np.sum([
                      row.scores_xuliang_post_median,
                      row.scores_siqiao_post_median,
                      row.scores_joslyn_post_median,
                      row.scores_shuheng_post_median,
                      row.scores_siqi_post_median
                  ]) > 2.0)
              self.ys.append(yy)
              self.ys.append(yy)
    
    def __getitem__(self, i):
        return self.xs[i], self.ys[i]

    def __len__(self):
        return len(self.xs)

In [None]:
all_ids = [1,2,3,4,5]
train_val_ids = [1,3,4,5] #[1,2,3,4,5]
inf_ids = [2] #[id for id in all_ids if id not in train_val_ids] #[4]

train_val_df = pd.concat([df_tsorted[df_tsorted['Id'] == i] for i in train_val_ids], ignore_index=True)
inf_df = pd.concat([df_tsorted[df_tsorted['Id'] == i] for i in inf_ids], ignore_index=True)

In [None]:
BATCH_SIZE = 32

train_val_dataset = ImageDataset(train_val_df, train_trans=train_trans)
train_indices = np.random.choice(len(train_val_dataset), int(len(train_val_dataset) * 0.8), replace=False)
valid_indices = [i for i in range(len(train_val_dataset)) if i not in train_indices]

train_dateset = torch.utils.data.Subset(train_val_dataset, train_indices)
val_dataset = torch.utils.data.Subset(train_val_dataset, valid_indices)
inf_dataset = ImageDataset(inf_df, inf=True, inf_trans = inf_trans)

train_loader = DataLoader(train_dateset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=True)
inf_loader = DataLoader(inf_dataset, batch_size=BATCH_SIZE, shuffle=False)

207it [02:26,  1.42it/s]
48it [00:36,  1.33it/s]


In [None]:
'''
from torchvision.models import resnet50, ResNet50_Weights
class Net(torch.nn.Module):
    def __init__(self, pretrained_net, last_shape=1000):
        super().__init__()
        self.pretrained_net = pretrained_net
        
        for p in list(self.pretrained_net.parameters()):
            p.requires_grad = True

        self.relu = torch.nn.ReLU()
        self.fc = torch.nn.Linear(last_shape, 1)

    def forward(self, x):
        return self.fc(self.relu(self.pretrained_net(x)))
'''
class Net(torch.nn.Module):
    def __init__(self, pretrained_net, last_shape=1000):
        super().__init__()
        self.pretrained_net = pretrained_net
        '''
        for p in self.pretrained_net.parameters():
            p.requires_grad = False
        
        for p in list(self.pretrained_net.parameters())[-6:]:
            p.requires_grad = True
        '''
        for p in list(self.pretrained_net.parameters()):
            p.requires_grad = True

        self.relu = torch.nn.ReLU()
        self.fc1 = torch.nn.Linear(last_shape, 256)
        self.fc2 = torch.nn.Linear(256, 32)
        self.fc3 = torch.nn.Linear(32, 1)

    def forward(self, x):
        x = self.fc1(self.relu(self.pretrained_net(x)))
        x = self.fc2(self.relu(x))
        x = self.fc3(self.relu(x))
        return x

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model = Net(resnet50(weights=ResNet50_Weights.IMAGENET1K_V2), 1000)
model = torch.load('/content/drive/Shareddrives/AC297R/acne04_pretrained_resnet50_model.pth')
model.to(DEVICE)

for name, p in model.named_parameters():
    if p.requires_grad:
        print(name, p.shape)

pretrained_net.conv1.weight torch.Size([64, 3, 7, 7])
pretrained_net.bn1.weight torch.Size([64])
pretrained_net.bn1.bias torch.Size([64])
pretrained_net.layer1.0.conv1.weight torch.Size([64, 64, 1, 1])
pretrained_net.layer1.0.bn1.weight torch.Size([64])
pretrained_net.layer1.0.bn1.bias torch.Size([64])
pretrained_net.layer1.0.conv2.weight torch.Size([64, 64, 3, 3])
pretrained_net.layer1.0.bn2.weight torch.Size([64])
pretrained_net.layer1.0.bn2.bias torch.Size([64])
pretrained_net.layer1.0.conv3.weight torch.Size([256, 64, 1, 1])
pretrained_net.layer1.0.bn3.weight torch.Size([256])
pretrained_net.layer1.0.bn3.bias torch.Size([256])
pretrained_net.layer1.0.downsample.0.weight torch.Size([256, 64, 1, 1])
pretrained_net.layer1.0.downsample.1.weight torch.Size([256])
pretrained_net.layer1.0.downsample.1.bias torch.Size([256])
pretrained_net.layer1.1.conv1.weight torch.Size([64, 256, 1, 1])
pretrained_net.layer1.1.bn1.weight torch.Size([64])
pretrained_net.layer1.1.bn1.bias torch.Size([64])


In [None]:
from torch.nn import MSELoss

def compute_mse_loss(model):
  count = 0
  loss = 0
  loss_fn = MSELoss()
  with torch.no_grad():
    for x, y in val_loader:
      y_pred = model(x.to(DEVICE))
      loss += loss_fn(y, y_pred)
      count += len(y)

  return loss / count

In [None]:
from torch.nn import BCEWithLogitsLoss

def compute_bce_acc(model):
  count = 0
  acc = 0
  loss_fn = BCEWithLogitsLoss()
  with torch.no_grad():
    for x, y in val_loader:
      y = y.type(torch.FloatTensor).to(DEVICE)
      y_pred = model(x.to(DEVICE))
      y_pred = torch.sigmoid(y_pred)
      y_pred = (y_pred > 0.5).cpu().numpy().astype(int)
      y_true = y.cpu().numpy().astype(int)
      acc += (y_true.flatten() == y_pred.flatten()).sum()
      count += len(y)

  return acc / count

In [None]:
from torch.optim import Adam
from torch.nn import MSELoss
from torch.nn import BCEWithLogitsLoss


optimizer = Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3)
loss_fn = BCEWithLogitsLoss()

In [None]:
for epoch in range(15):
    print(epoch)
    epoch_loss = 0
    for x, y in train_loader:
        y = y.type(torch.FloatTensor)
        x, y = x.to(DEVICE), y.to(DEVICE)
        optimizer.zero_grad()
        y_pred = model(x)
        loss = loss_fn(y_pred, y.reshape(-1, 1))
        epoch_loss += loss.item() * len(y)
        loss.backward()
        optimizer.step()

    print('valid acc =', compute_bce_acc(model))
    print('epoch loss =', epoch_loss / len(train_dateset))

0
valid acc = 0.7469879518072289
epoch loss = 0.3179797685038287
1
valid acc = 0.6385542168674698
epoch loss = 0.18637977844368656
2
valid acc = 0.6867469879518072
epoch loss = 0.17290565974762792
3
valid acc = 0.7108433734939759
epoch loss = 0.2540847828155918
4
valid acc = 0.7590361445783133
epoch loss = 0.23514279909994668
5
valid acc = 0.8072289156626506
epoch loss = 0.16155738210209908
6
valid acc = 0.7831325301204819
epoch loss = 0.09711141438642658
7
valid acc = 0.7710843373493976
epoch loss = 0.16250940644128806
8
valid acc = 0.7108433734939759
epoch loss = 0.18656069697570224
9
valid acc = 0.7951807228915663
epoch loss = 0.08108821727357962
10
valid acc = 0.7831325301204819
epoch loss = 0.040437014860711606
11
valid acc = 0.7951807228915663
epoch loss = 0.05210413632976325
12
valid acc = 0.8072289156626506
epoch loss = 0.04547153366026201
13
valid acc = 0.7469879518072289
epoch loss = 0.1255878505389856
14
valid acc = 0.7590361445783133
epoch loss = 0.12868015405094516


In [None]:
score_col = 'scores_resnet50'
inf_scores = []
for x, _ in inf_loader:
  x = x.to(DEVICE)
  inf_scores += list(torch.sigmoid(model(x)).detach().cpu().numpy().flatten())

inf_df[score_col] = inf_scores

inf_df.loc[:, 'Intervention\n(Boolean)'] = inf_df['Intervention\n(Boolean)'].apply(lambda s: s.upper().strip())
intervention = inf_df['Intervention\n(Boolean)']
x = intervention == 'TRUE'
y = intervention == 'FALSE'


with_inter = {
    idx: inf_df[(inf_df['Id'] == idx) & x][score_col].values
    for idx in inf_ids
}

without_inter = {
    idx: inf_df[(inf_df['Id'] == idx) & y][score_col].values
    for idx in inf_ids
}

In [None]:
inf_df.to_csv("/content/drive/Shareddrives/AC297R/images_new/transfer_siqiao_acne04_cnn_inf.csv")

In [None]:
inf_df 

Unnamed: 0,Timestamp (From Photo)(MMDD-YYYY-HHMMSS),Id,Image Id(Id-Timestamp),Temperature (°F),"Activity Level (Categorical), i.e none, light, intense",Applied Lotion/Makeup\n(Boolean),Intervention\n(Boolean),Unnamed: 7,scores_siqiao,scores_siqi,scores_shuheng,scores_xuliang,scores_joslyn,scores_xuliang_post_median,scores_siqiao_post_median,scores_joslyn_post_median,scores_shuheng_post_median,scores_siqi_post_median,scores_resnet50
0,1015-2022-134126,2,2-1015-2022-134126.jpg,78,,False,False,,0.8,1.0,1.0,0.846154,0.272727,1.0,1.0,0.0,1.0,1.0,0.847644
1,1015-2022-201604,2,2-1015-2022-201604.jpg,78,,False,False,,0.8,1.0,0.666667,0.846154,1.0,1.0,1.0,1.0,1.0,1.0,0.944891
2,1015-2022-232820,2,2-1015-2022-232820.jpg,78,,False,False,,0.4,0.5,0.166667,0.076923,0.454545,0.0,1.0,1.0,0.0,1.0,0.952674
3,1016-2022-125612,2,2-1016-2022-125612.jpg,77,,False,False,,0.2,0.25,0.166667,0.076923,0.272727,0.0,0.0,0.0,0.0,0.0,0.999944
4,1016-2022-184907,2,2-1016-2022-184907.jpg,77,,False,False,,0.6,0.5,0.333333,1.0,0.636364,1.0,1.0,1.0,1.0,1.0,0.976117
5,1016-2022-233702,2,2-1016-2022-233702.jpg,77,,False,False,,0.4,1.0,0.333333,0.692308,0.272727,1.0,1.0,0.0,1.0,1.0,0.989785
6,1017-2022-162727,2,2-1017-2022-162727.jpg,76,,False,True,,0.4,0.5,0.5,0.076923,0.636364,0.0,1.0,1.0,1.0,1.0,0.999671
7,1017-2022-195825,2,2-1017-2022-195825.jpg,76,,False,True,,0.4,0.75,0.333333,0.384615,0.636364,1.0,1.0,1.0,1.0,1.0,0.999999
8,1017-2022-232909,2,2-1018-2022-234638.jpg,76,,False,True,,0.2,0.5,0.166667,0.384615,0.090909,1.0,0.0,0.0,0.0,1.0,0.999272
9,1017-2022-232909,2,2-1017-2022-232909.jpg,76,,False,True,,0.6,0.75,0.5,0.538462,0.454545,1.0,1.0,1.0,1.0,1.0,0.998193
