In [1]:
import os
import sys
import torch
from torch.utils.data import DataLoader
from torchvision.transforms.functional import normalize, to_pil_image
import matplotlib.pyplot as plt
import json
import math
import pandas as pd
import numpy as np

parent_dir = os.path.abspath(os.path.pardir)
sys.path.append(parent_dir)

import datasets
from models import get_model
from utils import resize_density_map, sliding_window_predict

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")


truncation = 4
reduction = 8
granularity = "fine"
anchor_points = "average"

model_name = "clip_vit_b_16"
input_size = 224
window_size = 224
stride = 224 #Usado con sliding window (PARECE FUNCIONAR MUCHO MEJOR EN LAS PREDICCIONES [mirar texto negro sobre las imagenes])
#stride=None 
weight_count_loss = 1.0
count_loss = "dmcount"

# Comment the lines below to test non-CLIP models.
prompt_type = "word"
num_vpt = 32
vpt_drop = 0.
deep_vpt = True

mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)
alpha = 0.8



## Predictions on ShanghaiTech A (with GT labels)

In [2]:
dataset_name = "sha" #select dataset name
split = "val"

if truncation is None:  # regression, no truncation.
    bins, anchor_points = None, None
else:
    with open(os.path.join(parent_dir, "configs", f"reduction_{reduction}.json"), "r") as f:
        config = json.load(f)[str(truncation)][dataset_name]
    bins = config["bins"][granularity]
    anchor_points = config["anchor_points"][granularity]["average"] if anchor_points == "average" else config["anchor_points"][granularity]["middle"]
    bins = [(float(b[0]), float(b[1])) for b in bins]
    anchor_points = [float(p) for p in anchor_points]

model = get_model(
    backbone=model_name,
    input_size=input_size, 
    reduction=reduction,
    bins=bins,
    anchor_points=anchor_points,
    # CLIP parameters
    prompt_type=prompt_type,
    num_vpt=num_vpt,
    vpt_drop=vpt_drop,
    deep_vpt=deep_vpt
)

#Change ckpt_dir_name to get the weights you want
#ckpt_dir_name = f"{model_name}_{prompt_type}_" if "clip" in model_name else f"{model_name}_"
#ckpt_dir_name += f"{input_size}_{reduction}_{truncation}_{granularity}_"
#ckpt_dir_name += f"{weight_count_loss}_{count_loss}"
ckpt_dir_name = 'ShanghaiTech_A_CLIP_EBC_ViT_B_16_Word'

ckpt_path = os.path.join(
    parent_dir,
    "checkpoints",
    ckpt_dir_name,
    "best_mae.pth"  # select the weight file that you want to test
)
ckpt = torch.load(ckpt_path, map_location=device)
model.load_state_dict(ckpt)
model = model.to(device)

Missing keys: []
Unexpected keys: ['proj']
All keys matched successfully.
Initialized model with text prompts: ['There is no person.', 'There is one person.', 'There are two people.', 'There are three people.', 'There are more than four people.']


In [3]:
dataset = datasets.Crowd(dataset=dataset_name, split=split, sigma=8, return_filename=True)
dataloader = DataLoader(dataset, batch_size=1, shuffle=True, num_workers=0, collate_fn=datasets.collate_fn)
data_iter = iter(dataloader)

In [18]:
import pandas as pd
#CODIGO PARA COMPROBAR QUE next(data_iter) RECORRE TODAS LAS IMAGENES BIEN
img_id = None #CODIGO PARA VER QUE EL RECORTE LE HAGO BIEN
model.eval()
aux=pd.Series()
for i in range(182): #Hay 182 imagenes en sha, si pongo range(183) da error de StopIteration
    image, points, density, image_path = next(data_iter) 
    aux[i] = (os.path.basename(image_path[0]))
len(aux.unique())

182

In [4]:
img_id = None
model.eval() #No estoy seguro de si deberia meter esto en el bucle

ia_sha_df_eval = pd.DataFrame()
imagenes_recorridas = 0
while True:
    try:
        image, points, density, image_path = next(data_iter)
        imagenes_recorridas += 1
    except: #Sólo debería entrar aquí si llega al final de las iteraciones
        print('Imagenes en el dataset: ', dataset.__len__())
        print('Imagenes recorridas: ', imagenes_recorridas)
        break
    
    image_height, image_width = image.shape[-2:]
    image = image.to(device)
    image_name = os.path.basename(image_path[0])

    with torch.no_grad():
        if stride is not None:  # Sliding window prediction.
            pred_density = sliding_window_predict(model, image, window_size, stride)
        else:
            pred_density = model(image)
        pred_count = pred_density.sum().item()
        #resized_pred_density = resize_density_map(pred_density, (new_image_height, new_image_width)).cpu()
        
    #density = density.squeeze().numpy()
    #resized_pred_density = resized_pred_density.squeeze().numpy()
    #points = points[0].numpy() #EN SHA Y SHB POINTS PARECE SER YA UN ARRAY, EN QNRF DEBO DESCOMENTAR ESTA LINEA Y COMENTAR LA SIGUIENTE
    points = points[0]
    
    ia_sha_df_eval.loc[image_name,'Pred_Count'] = pred_count
    ia_sha_df_eval.loc[image_name,'GT Count'] = len(points)

Imagenes en el dataset:  182
Imagenes recorridas:  182


In [None]:
#El código de arriba tarda: 14 mins

In [None]:
#Guardo los datos obtenidos para no tener que correr el bucle de nuevo si reinicio jupyter lab

In [5]:
%store ia_sha_df_eval

Stored 'ia_sha_df_eval' (DataFrame)


In [None]:
#Cargo los datos obtenidos para no tener que correr el bucle de nuevo si reinicio jupyter lab

In [4]:
%store -r ia_sha_df_eval 


  obj = db["autorestore/" + arg]


In [14]:
ia_sha_df_eval

Unnamed: 0,Pred_Count,GT Count
020.jpg,448.449463,460.0
129.jpg,278.421906,298.0
048.jpg,401.350677,384.0
108.jpg,183.926270,182.0
125.jpg,263.224976,269.0
...,...,...
157.jpg,543.537476,392.0
170.jpg,744.809692,672.0
076.jpg,474.066498,485.0
133.jpg,109.661453,137.0


## Predictions on ShanghaiTech B (with GT labels)

In [6]:
dataset_name = "shb" #select dataset name
split = "val"

if truncation is None:  # regression, no truncation.
    bins, anchor_points = None, None
else:
    with open(os.path.join(parent_dir, "configs", f"reduction_{reduction}.json"), "r") as f:
        config = json.load(f)[str(truncation)][dataset_name]
    bins = config["bins"][granularity]
    anchor_points = config["anchor_points"][granularity]["average"] if anchor_points == "average" else config["anchor_points"][granularity]["middle"]
    bins = [(float(b[0]), float(b[1])) for b in bins]
    anchor_points = [float(p) for p in anchor_points]

model = get_model(
    backbone=model_name,
    input_size=input_size, 
    reduction=reduction,
    bins=bins,
    anchor_points=anchor_points,
    # CLIP parameters
    prompt_type=prompt_type,
    num_vpt=num_vpt,
    vpt_drop=vpt_drop,
    deep_vpt=deep_vpt
)

#Change ckpt_dir_name to get the weights you want
#ckpt_dir_name = f"{model_name}_{prompt_type}_" if "clip" in model_name else f"{model_name}_"
#ckpt_dir_name += f"{input_size}_{reduction}_{truncation}_{granularity}_"
#ckpt_dir_name += f"{weight_count_loss}_{count_loss}"
ckpt_dir_name = 'ShanghaiTech_B_CLIP_EBC_ViT_B_16_Word'

ckpt_path = os.path.join(
    parent_dir,
    "checkpoints",
    ckpt_dir_name,
    "best_mae.pth"  # select the weight file that you want to test
)
ckpt = torch.load(ckpt_path, map_location=device)
model.load_state_dict(ckpt)
model = model.to(device)

Missing keys: []
Unexpected keys: ['proj']
All keys matched successfully.
Initialized model with text prompts: ['There is no person.', 'There is one person.', 'There are two people.', 'There are three people.', 'There are more than four people.']


In [7]:
dataset = datasets.Crowd(dataset=dataset_name, split=split, sigma=8, return_filename=True)
dataloader = DataLoader(dataset, batch_size=1, shuffle=True, num_workers=0, collate_fn=datasets.collate_fn)
data_iter = iter(dataloader)

In [8]:
img_id = None
model.eval() #No estoy seguro de si deberia meter esto en el bucle

ia_shb_df_eval = pd.DataFrame()
imagenes_recorridas = 0
while True:
    try:
        image, points, density, image_path = next(data_iter)
        imagenes_recorridas += 1
    except: #Sólo debería entrar aquí si llega al final de las iteraciones
        print('Imagenes en el dataset: ', dataset.__len__())
        print('Imagenes recorridas: ', imagenes_recorridas)
        break
    
    image_height, image_width = image.shape[-2:]
    image = image.to(device)
    image_name = os.path.basename(image_path[0])

    with torch.no_grad():
        if stride is not None:  # Sliding window prediction.
            pred_density = sliding_window_predict(model, image, window_size, stride)
        else:
            pred_density = model(image)
        pred_count = pred_density.sum().item()
        #resized_pred_density = resize_density_map(pred_density, (new_image_height, new_image_width)).cpu()
        
    #density = density.squeeze().numpy()
    #resized_pred_density = resized_pred_density.squeeze().numpy()
    #points = points[0].numpy() #EN SHA Y SHB POINTS PARECE SER YA UN ARRAY, EN QNRF DEBO DESCOMENTAR ESTA LINEA Y COMENTAR LA SIGUIENTE
    points = points[0]
    
    ia_shb_df_eval.loc[image_name,'Pred_Count'] = pred_count
    ia_shb_df_eval.loc[image_name,'GT Count'] = len(points)

Imagenes en el dataset:  316
Imagenes recorridas:  316


In [None]:
#El código de arriba tarda: 37 mins

In [9]:
%store ia_shb_df_eval

Stored 'ia_shb_df_eval' (DataFrame)


In [None]:
%store -r ia_shb_df_eval


In [15]:
ia_shb_df_eval

Unnamed: 0,Pred_Count,GT Count
002.jpg,47.578129,52.0
176.jpg,220.504440,229.0
254.jpg,88.818100,91.0
065.jpg,235.232895,236.0
106.jpg,182.048965,165.0
...,...,...
306.jpg,260.429108,230.0
256.jpg,196.827850,194.0
187.jpg,154.164734,159.0
019.jpg,261.823151,277.0


## Predictions on UCF-QNRF (with GT labels)

In [10]:
dataset_name = "qnrf" #select dataset name
split = "val"

if truncation is None:  # regression, no truncation.
    bins, anchor_points = None, None
else:
    with open(os.path.join(parent_dir, "configs", f"reduction_{reduction}.json"), "r") as f:
        config = json.load(f)[str(truncation)][dataset_name]
    bins = config["bins"][granularity]
    anchor_points = config["anchor_points"][granularity]["average"] if anchor_points == "average" else config["anchor_points"][granularity]["middle"]
    bins = [(float(b[0]), float(b[1])) for b in bins]
    anchor_points = [float(p) for p in anchor_points]

model = get_model(
    backbone=model_name,
    input_size=input_size, 
    reduction=reduction,
    bins=bins,
    anchor_points=anchor_points,
    # CLIP parameters
    prompt_type=prompt_type,
    num_vpt=num_vpt,
    vpt_drop=vpt_drop,
    deep_vpt=deep_vpt
)

#Change ckpt_dir_name to get the weights you want
#ckpt_dir_name = f"{model_name}_{prompt_type}_" if "clip" in model_name else f"{model_name}_"
#ckpt_dir_name += f"{input_size}_{reduction}_{truncation}_{granularity}_"
#ckpt_dir_name += f"{weight_count_loss}_{count_loss}"
ckpt_dir_name = 'UCF_QNRF_CLIP_EBC_ViT_B_16_Word'

ckpt_path = os.path.join(
    parent_dir,
    "checkpoints",
    ckpt_dir_name,
    "best_mae.pth"  # select the weight file that you want to test
)
ckpt = torch.load(ckpt_path, map_location=device)
model.load_state_dict(ckpt)
model = model.to(device)

Missing keys: []
Unexpected keys: ['proj']
All keys matched successfully.
Initialized model with text prompts: ['There is no person.', 'There is one person.', 'There are two people.', 'There are three people.', 'There are more than four people.']


In [11]:
dataset = datasets.Crowd(dataset=dataset_name, split=split, sigma=8, return_filename=True)
dataloader = DataLoader(dataset, batch_size=1, shuffle=True, num_workers=0, collate_fn=datasets.collate_fn)
data_iter = iter(dataloader)

In [12]:
img_id = None
model.eval() #No estoy seguro de si deberia meter esto en el bucle

ia_qnrf_df_eval = pd.DataFrame()
imagenes_recorridas = 0
while True:
    try:
        image, points, density, image_path = next(data_iter)
        imagenes_recorridas += 1
    except: #Sólo debería entrar aquí si llega al final de las iteraciones
        print('Imagenes en el dataset: ', dataset.__len__())
        print('Imagenes recorridas: ', imagenes_recorridas)
        break
    
    image_height, image_width = image.shape[-2:]
    #image = image.to(device)
    image_name = os.path.basename(image_path[0])

    with torch.no_grad():
        if stride is not None:  # Sliding window prediction.
            pred_density = sliding_window_predict(model, image, window_size, stride)
        else:
            pred_density = model(image)
        pred_count = pred_density.sum().item()
        #resized_pred_density = resize_density_map(pred_density, (new_image_height, new_image_width)).cpu()
    
    #density = density.squeeze().numpy()
    #resized_pred_density = resized_pred_density.squeeze().numpy()
    points = points[0]
    
    ia_qnrf_df_eval.loc[image_name,'Pred_Count'] = pred_count
    ia_qnrf_df_eval.loc[image_name,'GT Count'] = len(points)

Imagenes en el dataset:  334
Imagenes recorridas:  334


In [None]:
#El código de arriba tarda: 1h y 48 mins

In [13]:
%store ia_qnrf_df_eval

Stored 'ia_qnrf_df_eval' (DataFrame)


In [None]:
%store -r ia_qnrf_df_eval


In [16]:
ia_qnrf_df_eval

Unnamed: 0,Pred_Count,GT Count
080.jpg,324.718201,347.0
203.jpg,311.745911,368.0
083.jpg,472.888885,693.0
129.jpg,306.869232,311.0
318.jpg,1623.588501,1616.0
...,...,...
264.jpg,255.129578,225.0
066.jpg,2566.565918,2980.0
260.jpg,2289.921875,2116.0
087.jpg,490.609344,626.0


In [None]:
########################################################################################################################

In [None]:
#No ejecutar más código, tarda mucho y no merece la pena porque no tengo la cantidad de personas en cada imagen

## Predictions on NWPU-Test (without GT labels)

In [6]:
if truncation is None:  # regression, no truncation.
    bins, anchor_points = None, None
else:
    with open(os.path.join(parent_dir, "configs", f"reduction_{reduction}.json"), "r") as f:
        config = json.load(f)[str(truncation)]["nwpu"]
    bins = config["bins"][granularity]
    anchor_points = config["anchor_points"][granularity]["average"] if anchor_points == "average" else config["anchor_points"][granularity]["middle"]
    bins = [(float(b[0]), float(b[1])) for b in bins]
    anchor_points = [float(p) for p in anchor_points]


model = get_model(
    backbone=model_name,
    input_size=input_size, 
    reduction=reduction,
    bins=bins,
    anchor_points=anchor_points,
    # CLIP parameters
    prompt_type=prompt_type,
    num_vpt=num_vpt,
    vpt_drop=vpt_drop,
    deep_vpt=deep_vpt
)


#Change ckpt_dir_name to get the weights you want
#ckpt_dir_name = f"{model_name}_{prompt_type}_" if "clip" in model_name else f"{model_name}_"
#ckpt_dir_name += f"{input_size}_{reduction}_{truncation}_{granularity}_"
#ckpt_dir_name += f"{weight_count_loss}_{count_loss}"
ckpt_dir_name = 'NWPU_CLIP_ViT_B_16_Word'


ckpt_path = os.path.join(
    parent_dir,
    "checkpoints",
    ckpt_dir_name,
    "best_mae.pth"  # select the weight file that you want to test
)

#ckpt_dir_name = f"{model_name}_{prompt_type}_" if "clip" in model_name else f"{model_name}_"
#ckpt_dir_name += f"{input_size}_{reduction}_{truncation}_{granularity}_"
#ckpt_dir_name += f"{weight_count_loss}_{count_loss}"

#ckpt_path = os.path.join(
#    parent_dir,
#    "checkpoints",
#    "nwpu",
#    ckpt_dir_name,
#    "best_mae_0.pth"  # select the weight file that you want to test
#)
ckpt = torch.load(ckpt_path, map_location=device)
model.load_state_dict(ckpt)
model = model.to(device)
model.eval()

dataset = datasets.NWPUTest(transforms=None, return_filename=True)

Missing keys: []
Unexpected keys: ['proj']
All keys matched successfully.
Initialized model with text prompts: ['There is no person.', 'There is one person.', 'There are two people.', 'There are three people.', 'There are more than four people.']


In [None]:
#ESTE CODIGO TARDA COMO UN DIA ENTERO, YA QUE SON 1500 IMAGENES. (No usar)

In [7]:
dataset.__len__()

1500

In [5]:
ia_nwpu_df_eval = pd.DataFrame()
imagenes_recorridas = 0
for k in range(dataset.__len__()):
    image, image_path = dataset[k]
    print(k)
    imagenes_recorridas += 1

    image = image.unsqueeze(0)
    
    image_height, image_width = image.shape[-2:]
    image = image.to(device)
    image_name = os.path.basename(image_path[0])
    
    with torch.no_grad():
        if stride is not None:  # Sliding window prediction.
            pred_density = sliding_window_predict(model, image, window_size, stride)
        else:
            pred_density = model(image)
        pred_count = pred_density.sum().item()
        #resized_pred_density = resize_density_map(pred_density, (new_image_height, new_image_width)).cpu()
    
    #density = density.squeeze().numpy()
    #resized_pred_density = resized_pred_density.squeeze().numpy()
    #points = points[0].numpy() #EN SHA Y SHB POINTS PARECE SER YA UN ARRAY, EN QNRF DEBO DESCOMENTAR ESTA LINEA Y COMENTAR LA SIGUIENTE
    #points = points[0]
    
    ia_nwpu_df_eval.loc[image_name,'Pred_Count'] = pred_count
    #ia_nwpu_df_eval.loc[image_name,'GT Count'] = len(points) #EN ESTE CASO NO HAY POINTS, COMO CALCULAN ELLOS EL MAE?????????????

print('Imagenes en el dataset: ', dataset.__len__())
print('Imagenes recorridas: ', imagenes_recorridas)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

KeyboardInterrupt: 

In [None]:
%store ia_nwpu_df_eval 

In [None]:
%store -r ia_nwpu_df_eval 
