In [14]:
import os 
import sys
import pandas as pd
import numpy as np
sys.path.insert(0, "..")
from PIL import Image
import matplotlib.pyplot as plt
from datetime import datetime
import torch
from main import load_in_loss
from utils import load_in_model, convert_types
from dataset_utils import create_datasets
from main import run_one_epoch
# import re
%load_ext autoreload
%autoreload
os.environ["CUDA_VISIBLE_DEVICES"] = "1"


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [15]:
def load_in_mask_info(exp_name):
    mask_info = None
    if "1CHANNEL" in exp_name:
        mask_info = "1CHANNEL"
    elif "5CHANNEL" in exp_name:
        mask_info = "5CHANNEL"
    elif "BOX" in exp_name:
        mask_info = "BOX"
    else:
        mask_info = None
        
    if "premask" in exp_name:
        pre_mask = 1
    else:
        pre_mask = 0
    return mask_info, pre_mask

In [56]:
def create_lap_eval_dict(data_dir, exp_name, finetune=False):
    log_dir = os.path.join(data_dir, "logs")
    model_dir = os.path.join(data_dir, "models")
    
    #get tracker with all hyperparamters:
    tracker_csv_path = os.path.join(log_dir, "exp_tracker.csv")
    tracker = pd.read_csv(tracker_csv_path)
    
    #get model from previously trained experiment
    load_model = os.path.join(model_dir, exp_name + ".pt")
    
    #construction of args dict to make sure similar hyperparameters (OTHER THAN LR AND DROPOUT)
    args_dict = tracker[tracker["exp_name"] == exp_name].iloc[0]#.to_dict()
    args_dict = args_dict.drop(["date"])
    
    #for fine tuning set it small
    args_dict["lr"] = .00001
    args_dict["dropout"] = 0.33

    #add on eval script at front
    args_dict["date"] = datetime.now().date().strftime("%D")
    #load in mask info based on name
    args_dict["mask_info"], args_dict["pre_mask"] = load_in_mask_info(exp_name)

    #get lap dataset evaluation
    args_dict["data_source"] = "lap"
    if finetune:
        args_dict["exp_name"] = "LAP_finetune_" + args_dict["exp_name"] 
    else:
        args_dict["exp_name"] = "LAP_eval_" + args_dict["exp_name"] 
        
    #load in model
    args_dict["load_model"] = load_model
    args_dict["transform"] = None

    #newer experiments have data_dir not model_dir, but just in case.
    try:
        data_dir = args_dict["model_dir"].split("models")[0]
        args_dict = args_dict.drop(["log_csv_path", "logs_dir", "model_dir"])
        args_dict["data_dir"] = data_dir
    except:
        pass
    args_dict["scheduler_type"] = None
    args_dict["bs"] = 64
    args_dict["num_workers"] = 8
    
    return args_dict

In [57]:
def eval_model_lap(exp_name, data_dir="/home/jessekim"):
    #get similar args
    args = convert_types(create_lap_eval_dict(data_dir=data_dir, exp_name=exp_name, finetune=False))

    #get the model
    model = load_in_model(
    args.model_type, args.output_type, args.loss_type, args.dropout, args.mask_info, args.pre_mask, args.load_model
    )
    
    # add to device (GPU vs CPU) and parallelize it
    if torch.cuda.device_count() > 1:
        print('Using', torch.cuda.device_count(), 'GPUs')
        model = nn.DataParallel(model)
        # model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[], output_device=None)
    model = model.to(args.device)
    
    #get loss
    loss = load_in_loss(args.loss_type)
    
    #get test loader
    test_loader = create_datasets(
        dataset_type="test",
        data_dir=args.data_dir,
        transform=args.transform,
        mask_info=args.mask_info,
        bs=64,
        shuffle=True,
        num_workers=8,
        data_source=args.data_source,
    )

    model, test_loss_vals, test_mae_vals, test_acc = run_one_epoch(
        model,
        test_loader,
        optimizer=None,
        mask_info=args.mask_info,
        scheduler=None,
        loss=loss,
        train=False,
        device=args.device,
        output_type=args.output_type,
        loss_type=args.loss_type,
    )
    return test_loss_vals, test_mae_vals, test_acc

In [58]:
from main import train

In [None]:
%%time
exp_name = "ViT_NLL_NOMASK_40DROP_lr.0001"
data_dir = "/home/jessekim"

def finetune_on_lap(data_dir = "/home/jessekim",  exp_name):
    #get args dict from original experiment
    args = create_lap_eval_dict(data_dir=data_dir, exp_name=exp_name, finetune=True)
    
    logs_dir = os.path.join(args["data_dir"], "logs")
    log_csv_path = os.path.join(logs_dir, "lap_exp_tracker.csv")

    # logging:
    setup_exp(log_csv_path, logs_dir, args["exp_name"])
    # add experiment to experiment tracker:
    log_experiment(log_csv_path, args)
    
    train(**args)

In [31]:
%%time

## for ViT_NLL_NOMASK_40DROP_lr.0001

# validation
model, valid_loss_vals, valid_mae_vals, valid_acc = run_one_epoch(
    model,
    test_loader,
    optimizer=None,
    mask_info=args.mask_info,
    scheduler=None,
    loss=loss,
    train=False,
    device=args.device,
    output_type=args.output_type,
    loss_type=args.loss_type,
)

log each epoch:
log_output(
    exp_name,
    logs_dir,
    epoch,
    valid_loss_vals,
    valid_mae_vals,
    valid_acc,
    "valid",
)

print(valid_loss_vals, valid_mae_vals, valid_acc)

4.564251419036619 7.351155404121645 0.06370070778564206
CPU times: user 3.16 s, sys: 1.36 s, total: 4.52 s
Wall time: 30.9 s


#### Generation of LAP csvs:

In [4]:
lap_path = "../../data/appa-real-release"
#original csvs from release
train_csv = os.path.join(lap_path, "gt_train.csv")
valid_csv = os.path.join(lap_path, "gt_valid.csv")
test_csv = os.path.join(lap_path, "gt_test.csv")

#newly created csvs
new_train_csv = os.path.join(lap_path, "train.csv")
new_valid_csv = os.path.join(lap_path, "valid.csv")
new_test_csv = os.path.join(lap_path, "test.csv")


train_path = os.path.join(lap_path, "train")


In [5]:
lap_path = "../../data/appa-real-release"
train_csv = os.path.join(lap_path, "gt_train.csv")
valid_csv = os.path.join(lap_path, "gt_valid.csv")
test_csv = os.path.join(lap_path, "gt_test.csv")

In [6]:
plt.imshow(im.permute(1,2,0))

NameError: name 'im' is not defined

In [7]:
gt_train = pd.read_csv(os.path.join(lap_path, "gt_train.csv"))

In [10]:
gt_train["real_age"].min()

1

In [9]:
gt_train[gt_train["real_age"] < 100]

Unnamed: 0,file_name,real_age,apparent_age,worker_age,worker_gender
0,000000.jpg,4,5,36,male
1,000000.jpg,4,4,38,female
2,000000.jpg,4,7,52,female
3,000000.jpg,4,5,50,male
4,000000.jpg,4,4,52,female
...,...,...,...,...,...
133930,004112.jpg,20,17,-1,
133931,004112.jpg,20,20,26,
133932,004112.jpg,20,22,31,
133933,004112.jpg,20,19,19,


In [11]:
def convert_csvs(csv_path):
    df = pd.read_csv(csv_path)
    df = df.groupby("file_name").mean()["real_age"].reset_index()
    df["file_name"] = df["file_name"] + "_face.jpg"
    df.columns = ["full_path", "age"]
    df["age"] = df["age"].astype(int)
    df = df[df["age"] > 0]
    df = df[df["age"] < 100]
    return df.reset_index(drop=True)

In [1]:
df

NameError: name 'df' is not defined

In [17]:
convert_csvs(train_csv)["age"].max()

100

In [300]:
convert_csvs(train_csv)

Unnamed: 0,full_path,age
0,000000.jpg_face.jpg,4
1,000001.jpg_face.jpg,18
2,000002.jpg_face.jpg,80
3,000003.jpg_face.jpg,50
4,000004.jpg_face.jpg,17
...,...,...
4108,004108.jpg_face.jpg,41
4109,004109.jpg_face.jpg,53
4110,004110.jpg_face.jpg,35
4111,004111.jpg_face.jpg,28


In [12]:
convert_csvs(train_csv).to_csv(os.path.join(lap_path, "train.csv"),index=False)
convert_csvs(valid_csv).to_csv(os.path.join(lap_path, "valid.csv"),index=False)
convert_csvs(test_csv).to_csv(os.path.join(lap_path, "test.csv"),index=False)

In [13]:
Image.open(os.path.join(train_path, faces[0]))

NameError: name 'faces' is not defined

In [26]:
faces = sorted([i for i in os.listdir(os.path.join(lap_path, "train")) if ".jpg_face.jpg" in i ])