In [1]:
import pandas as pd
import wandb
from wd.utils.utils import mlflow_linearize

In [2]:
api = wandb.Api()

In [10]:
filters = {
    'group': 'Lawin'
}
selected_cols = [
    'in_params.dataset.channels',
    'in_params.train_params.optimizer_params.weight_decay',
    'in_params.train_params.initial_lr',
    'in_params.model.params.backbone',
    # 'in_params.model.name',
    'f1',
    'precision',
    'recall'
]
renamings = {
    'in_params.dataset.channels': 'Channels',
    'in_params.train_params.optimizer_params.weight_decay': "Weight Decay",
    'in_params.train_params.initial_lr': 'LR',
    'in_params.model.name': 'Model',
    'in_params.model.params.backbone': 'Backbone',
    'f1': 'F1',
    'precision': 'Precision',
    'recall': 'Recall'
}

In [28]:
# Project is specified by <entity/project-name>
runs = api.runs("pasqualedem/weeddetection", filters=filters)

summary_list, config_list, name_list = [], [], []
for run in runs:
    # .summary contains the output keys/values for metrics like accuracy.
    #  We call ._json_dict to omit large files
    summary_list.append(run.summary._json_dict)

    # .config contains the hyperparameters.
    #  We remove special values that start with _.
    config_list.append(
        {k: v for k,v in run.config.items()
          if not k.startswith('_')})

    # .name is the human-readable name of the run.
    name_list.append(run.name)

runs_df = pd.DataFrame([{**mlflow_linearize(dc), **mlflow_linearize(ds)} for dc, ds in zip(config_list, summary_list)])

In [29]:
to_delete = filter(lambda x: x.startswith('hyper_params'), runs_df.columns)
list(to_delete)

['hyper_params.arch_params.schema',
 'hyper_params.arch_params.sync_bn',
 'hyper_params.arch_params.backbone',
 'hyper_params.arch_params.in_channels',
 'hyper_params.arch_params.num_classes',
 'hyper_params.arch_params.out_channels',
 'hyper_params.arch_params.input_channels',
 'hyper_params.arch_params.output_channels',
 'hyper_params.dataset_params.root',
 'hyper_params.dataset_params.size',
 'hyper_params.dataset_params.schema',
 'hyper_params.dataset_params.s3_link',
 'hyper_params.dataset_params.channels',
 'hyper_params.dataset_params.hor_flip',
 'hyper_params.dataset_params.ver_flip',
 'hyper_params.dataset_params.crop_size',
 'hyper_params.dataset_params.batch_size',
 'hyper_params.dataset_params.dataset_dir',
 'hyper_params.dataset_params.num_classes',
 'hyper_params.dataset_params.num_workers',
 'hyper_params.dataset_params.return_path',
 'hyper_params.dataset_params.test_folders',
 'hyper_params.dataset_params.train_folders',
 'hyper_params.dataset_params.val_batch_size',
 

In [30]:
for l in to_delete:
    runs_df.drop(l, in_place=True)

In [31]:
runs_df

Unnamed: 0,in_params.tags,in_params.model.name,in_params.model.params.backbone,in_params.phases,in_params.dataset.root,in_params.dataset.size,in_params.dataset.channels,in_params.dataset.hor_flip,in_params.dataset.ver_flip,in_params.dataset.crop_size,...,train_WrapF1,val_auc_crop,confusion_matrix.ncols,confusion_matrix.nrows,confusion_matrix.sha256,confusion_matrix.artifact_path,confusion_matrix._latest_artifact_path,confusion_matrix.path,confusion_matrix.size,confusion_matrix._type
0,"[crop, B0]",lawin,MiT-B0,"[train, test]",dataset/processed/Sequoia,same,"[G, NDVI, NIR, R, RE]",True,True,"[256, 256]",...,0.438004,0.928171,3,3,9183ec9d76a194d77f730280fcd484a43a7a08d4cefbaf...,wandb-client-artifact://146p9bsudr1pwm3hufru9a...,wandb-client-artifact://146p9bsudr1pwm3hufru9a...,media/table/confusion_matrix_298_9183ec9d76a19...,121,table-file
1,"[crop, B0]",lawin,MiT-B0,"[train, test]",dataset/processed/Sequoia,same,"[R, G, RE, NIR]",True,True,"[256, 256]",...,0.589538,0.792952,3,3,08bcce44e98f0b499f105809366bbe9667ea77f7060453...,wandb-client-artifact://17yncpf3tzx4jy6t784ow1...,wandb-client-artifact://17yncpf3tzx4jy6t784ow1...,media/table/confusion_matrix_536_08bcce44e98f0...,115,table-file
2,"[crop, B0]",lawin,MiT-B0,"[train, test]",dataset/processed/Sequoia,same,"[G, NIR, R]",True,True,"[256, 256]",...,0.543906,0.897648,3,3,c223e92002d8e1ce2de18396f0afb72d9fe66590a7d2c4...,wandb-client-artifact://kes7bchpwgjf20bwxu8fmk...,wandb-client-artifact://kes7bchpwgjf20bwxu8fmk...,media/table/confusion_matrix_774_c223e92002d8e...,119,table-file
3,"[crop, B0]",lawin,MiT-B0,"[train, test]",dataset/processed/Sequoia,same,CIR,True,True,"[256, 256]",...,0.527644,0.863349,3,3,6aa2c951a59f525881104a38b18dcd555bcb68ed577110...,wandb-client-artifact://d2ylmgqiiqfliszsg40vfs...,wandb-client-artifact://d2ylmgqiiqfliszsg40vfs...,media/table/confusion_matrix_592_6aa2c951a59f5...,121,table-file
4,"[crop, B0]",lawin,MiT-B0,"[train, test]",dataset/processed/Sequoia,same,[NDVI],True,True,"[256, 256]",...,0.532063,0.977426,3,3,afea42011ea34a5a2844a67b6fc86b686c014582f83371...,wandb-client-artifact://xhtuflw7hec0fenynnl6hp...,wandb-client-artifact://xhtuflw7hec0fenynnl6hp...,media/table/confusion_matrix_536_afea42011ea34...,119,table-file
5,"[crop, B0]",lawin,MiT-B0,"[train, test]",dataset/processed/Sequoia,same,"[R, G]",True,True,"[256, 256]",...,0.542541,0.952213,3,3,b24e863ca338be3744fec0a94357b2dc0fa8282f75dc8a...,wandb-client-artifact://434kekma40grmdb282nidy...,wandb-client-artifact://434kekma40grmdb282nidy...,media/table/confusion_matrix_592_b24e863ca338b...,125,table-file
6,"[crop, B0]",lawin,MiT-B0,"[train, test]",dataset/processed/Sequoia,same,"[G, NDVI, NIR, R, RE]",True,True,"[256, 256]",...,0.591053,0.991407,3,3,1e93a24fc57c0f42ce7de71beb7d801aace6ab465a7b12...,wandb-client-artifact://ksznqmt52pips0h8ekr678...,wandb-client-artifact://ksznqmt52pips0h8ekr678...,media/table/confusion_matrix_690_1e93a24fc57c0...,118,table-file
7,"[crop, B0]",lawin,MiT-B0,"[train, test]",dataset/processed/Sequoia,same,"[R, G, RE, NIR]",True,True,"[256, 256]",...,0.556098,0.946489,3,3,ab8426fd89293f4326e75eaaa820f89998c11b130f2ec9...,wandb-client-artifact://yvt6qp3w23pa6tuv3rqtyq...,wandb-client-artifact://yvt6qp3w23pa6tuv3rqtyq...,media/table/confusion_matrix_676_ab8426fd89293...,117,table-file
8,"[crop, B0]",lawin,MiT-B0,"[train, test]",dataset/processed/Sequoia,same,"[G, NIR, R]",True,True,"[256, 256]",...,0.55181,0.775647,3,3,024e6940349163d867d353c88c4d225e8537afd9580310...,wandb-client-artifact://16kkrjvbap2j2difbtna7b...,wandb-client-artifact://16kkrjvbap2j2difbtna7b...,media/table/confusion_matrix_690_024e694034916...,118,table-file
9,"[crop, B0]",lawin,MiT-B0,"[train, test]",dataset/processed/Sequoia,same,CIR,True,True,"[256, 256]",...,0.539615,0.859283,3,3,1e47bdbbfc73deb18a0584678f284bce903b7eebb8b110...,wandb-client-artifact://yqzsjiypuotvqobm5dxfun...,wandb-client-artifact://yqzsjiypuotvqobm5dxfun...,media/table/confusion_matrix_592_1e47bdbbfc73d...,117,table-file


In [59]:
projected_runs = runs_df[
    selected_cols
]

In [60]:
# selected_runs = projected_runs[projected_runs['in_params.model.params.backbone'] == 'MiT-B0']
selected_runs = projected_runs

In [61]:
selected_runs

Unnamed: 0,in_params.dataset.channels,in_params.train_params.optimizer_params.weight_decay,in_params.train_params.initial_lr,in_params.model.params.backbone,f1,precision,recall
0,"[G, NDVI, NIR, R, RE]",0.0005,0.0001,MiT-B0,0.326794,0.345053,0.465314
1,"[R, G, RE, NIR]",0.0005,0.0001,MiT-B0,0.377526,0.705026,0.357784
2,"[G, NIR, R]",0.0005,0.0001,MiT-B0,0.357331,0.499817,0.348049
3,CIR,0.0005,0.0001,MiT-B0,0.404106,0.428261,0.403309
4,[NDVI],0.0005,0.0001,MiT-B0,0.331022,0.338566,0.456207
5,"[R, G]",0.0005,0.0001,MiT-B0,0.356745,0.390026,0.434191
6,"[G, NDVI, NIR, R, RE]",0.0,0.0001,MiT-B0,0.338468,0.390821,0.496304
7,"[R, G, RE, NIR]",0.0,0.0001,MiT-B0,0.372336,0.573994,0.355982
8,"[G, NIR, R]",0.0,0.0001,MiT-B0,0.371983,0.444056,0.481801
9,CIR,0.0,0.0001,MiT-B0,0.369274,0.399371,0.358478


In [62]:
selected_runs.rename(columns=renamings, inplace=True)
selected_runs[['F1', 'Precision', 'Recall']] = selected_runs[['F1', 'Precision', 'Recall']].round(3)
selected_runs = selected_runs.applymap(lambda x: ', '.join(x) if isinstance(x, list) else x)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected_runs.rename(columns=renamings, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected_runs[['F1', 'Precision', 'Recall']] = selected_runs[['F1', 'Precision', 'Recall']].round(3)


In [None]:
idx = selected_runs.groupby(by='in_params.dataset.channels')['f1'].transform(max) == selected_runs['f1']
best_runs = selected_runs[idx]

In [63]:
selected_runs

Unnamed: 0,Channels,Weight Decay,LR,Backbone,F1,Precision,Recall
0,"G, NDVI, NIR, R, RE",0.0005,0.0001,MiT-B0,0.327,0.345,0.465
1,"R, G, RE, NIR",0.0005,0.0001,MiT-B0,0.378,0.705,0.358
2,"G, NIR, R",0.0005,0.0001,MiT-B0,0.357,0.5,0.348
3,CIR,0.0005,0.0001,MiT-B0,0.404,0.428,0.403
4,NDVI,0.0005,0.0001,MiT-B0,0.331,0.339,0.456
5,"R, G",0.0005,0.0001,MiT-B0,0.357,0.39,0.434
6,"G, NDVI, NIR, R, RE",0.0,0.0001,MiT-B0,0.338,0.391,0.496
7,"R, G, RE, NIR",0.0,0.0001,MiT-B0,0.372,0.574,0.356
8,"G, NIR, R",0.0,0.0001,MiT-B0,0.372,0.444,0.482
9,CIR,0.0,0.0001,MiT-B0,0.369,0.399,0.358


In [58]:
best_runs

Unnamed: 0,Channels,Weight Decay,LR,Backbone,F1,Precision,Recall
3,CIR,0.0005,0.0001,MiT-B0,0.404,0.428,0.403
14,"G, NIR, R",0.0005,0.001,MiT-B0,0.4,0.543,0.378
18,"G, NDVI, NIR, R, RE",0.0,0.001,MiT-B0,0.34,0.37,0.506
19,"R, G, RE, NIR",0.0,0.001,MiT-B0,0.483,0.484,0.576
22,NDVI,0.0,0.001,MiT-B0,0.331,0.335,0.409
23,"R, G",0.0,0.001,MiT-B0,0.437,0.414,0.623


In [54]:
best_runs.to_csv("../../exps/lawin.csv")

In [64]:
selected_runs.to_csv("../../exps/lawin-complete.csv")