In [1]:
import pandas as pd
import os
import warnings
import numpy as np
from tqdm import tqdm
warnings.filterwarnings("ignore")
import sys
sys.path.append("/code/LLM-crime")
from generate_features import generate_features

In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import sys
from safety_perception_dataset import SafetyPerceptionCLIPDataset
import numpy as np
import pandas as pd
from tqdm import tqdm
import neptune
sys.path.append("/code/LLM-crime/safety_perception_model/single_model")
from my_models import TransformerRegressionModel, FeatureViTClassifier
sys.path.append("/code/LLM-crime")
from custom_clip_train import CLIPModel, CLIPDataset, build_loaders, make_prediction
from transformers import DistilBertModel, DistilBertConfig, DistilBertTokenizer
from torch.utils.data import DataLoader
from sklearn.metrics import confusion_matrix
import seaborn as sns
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import label_binarize
from sklearn.metrics import precision_recall_curve, average_precision_score

def get_img_feature(paras):
    CLIP_model_path = os.path.join(paras['save_model_path'], paras['save_model_name'])
    save_paths = paras['variables_save_paths']
    if not os.path.exists(save_paths):
        os.makedirs(save_paths)
    text_tokenizer = "distilbert-base-uncased"
    # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {paras['device']}")

    img_encoder_paras = torch.load(CLIP_model_path)
    img_encoder = CLIPModel(paras)
    img_encoder.load_state_dict(img_encoder_paras)
    baseline_data = pd.read_csv(paras['dataset_path'])
    # print("baseline_data: ", len(baseline_data))
    tokenizer = DistilBertTokenizer.from_pretrained(text_tokenizer)

    data_loader = build_loaders(baseline_data, tokenizer, mode="valid", cfg_paras=paras)

    img_encoder.to(paras['device'])
    img_feature, text_feature = make_prediction(img_encoder, data_loader, cfg_paras=paras) # (datasize, 256)
    img_feature = np.array(img_feature)
    text_feature = np.array(text_feature)

    # Save img_feature to a file
    np.save(os.path.join(save_paths, 'img_feature.npy'), img_feature)
    np.save(os.path.join(save_paths, 'text_feature.npy'), text_feature)
    return img_feature, text_feature

paras = {
    'debug':False,
    # 'dataset_path':"/data2/cehou/LLM_safety/img_text_data/dataset_baseline_baseline_baseline_baseline_1401.pkl",
    'dataset_path':'/data2/cehou/LLM_safety/img_text_data/baseline/tidyed/dataset_baseline_baseline_baseline_baseline_9030_withlabel.csv',
    'save_model_path':"/data2/cehou/LLM_safety/LLM_models/clip_model/test",
    'save_model_name':"model_baseline_test.pt",
    'device':torch.device("cuda:3" if torch.cuda.is_available() else "cpu"),
    'batch_size':60,
    'num_workers':4,
    'head_lr':1e-3,
    'image_encoder_lr':1e-4,
    'text_encoder_lr':1e-5,
    'weight_decay':1e-3,
    'img_type':'PlacePulse',
    'patience':1,
    'factor':0.8,
    'epochs':400,
    'image_embedding':768,
    'text_embedding':768,
    'max_length':512,
    'size':(112,112),
    
    # models for image and text
    'model_name':'resnet50',
    'text_encoder_model':"distilbert-base-uncased",
    'text_tokenizer': "distilbert-base-uncased",
    'pretrained':True,
    'trainable':True,
    
    # deep learning model parameters
    'temperature':0.07,
    'projection_dim':256,
    'dropout':0.1,
    'early_stopping_threshold':5,
    
    # safety perception
    # 'CLIP_model_path': "/data2/cehou/LLM_safety/LLM_models/clip_model/test/model_baseline_best.pt",
    'variables_save_paths': f"/data2/cehou/LLM_safety/middle_variables/test",
    'safety_model_save_path' : f"/data2/cehou/LLM_safety/LLM_models/safety_perception_model/",
    'placepulse_datapath': "/data2/cehou/LLM_safety/PlacePulse2.0/image_perception_score.csv",
    'eval_path': "/data2/cehou/LLM_safety/eval/test/",
    'train_type': 'classification',
    'safety_epochs': 200,
    'CNN_lr': 1e-2,
    
    }

img_feature,_ = get_img_feature(paras)
print(img_feature.shape)
data = pd.read_csv(paras['placepulse_datapath'])
SVI_namelist = pd.read_csv(paras['dataset_path'])
namelist = pd.DataFrame([SVI_namelist.loc[i,'Image_ID'] for i in range(len(SVI_namelist))],columns=['Image_ID'])
data = namelist.merge(data[data['Category'] == 'safety'], on='Image_ID')
data_nonezero = data[data['label'] != 0]
data_nonezero_idx = data[data['label'] != 0].index
img_feature_nonezero = img_feature[data_nonezero_idx,:]
data_nonezero = data_nonezero.reset_index(drop=True)

train_len = int(0.7*len(img_feature_nonezero))
train_dataset = SafetyPerceptionCLIPDataset(data[:train_len], img_feature[:train_len], paras)
valid_dataset = SafetyPerceptionCLIPDataset(data[train_len:], img_feature[train_len:], paras)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=16, shuffle=False)

Using device: cuda:3


100%|██████████| 151/151 [00:44<00:00,  3.41it/s]


(9030, 256)


In [17]:
data_nonezero.loc[data_nonezero[data_nonezero['label'] == -1].index,'label'] = 0

In [18]:
data_nonezero

Unnamed: 0,Image_ID,Score,Category,label
1,50f55e9bfdc9f065f0004d92,5.406606,safety,1
2,50f55e9cfdc9f065f0004da9,4.219841,safety,0
3,50f55e9cfdc9f065f0004daa,5.941633,safety,1
4,50f55e9cfdc9f065f0004dae,4.258011,safety,0
6,50f55e9dfdc9f065f0004dca,5.567581,safety,1
...,...,...,...,...
9023,50f55e1dfdc9f065f0004ae8,6.001295,safety,1
9024,50f55e1dfdc9f065f0004ae9,5.622535,safety,1
9025,50f55e1efdc9f065f0004af6,5.988817,safety,1
9027,50f55e1efdc9f065f0004b03,6.583531,safety,1


In [None]:
import torch

import torch.nn as nn
import torchvision.models as models

class ResNet50Custom(nn.Module):
    def __init__(self, input_dim=256, num_classes=10):
        super(ResNet50Custom, self).__init__()
        self.input_dim = input_dim
        self.num_classes = num_classes
        
        # Load the pre-trained ResNet50 model
        self.resnet50 = models.resnet50(pretrained=True)
        # Modify the first convolutional layer to accept 1-channel input
        self.resnet50.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        
        # Modify the fully connected layer to match the input dimension and number of classes
        self.resnet50.fc = nn.Linear(self.resnet50.fc.in_features, self.num_classes)
        
    def forward(self, x):
        # Reshape the input to match the expected input shape of ResNet50
        x = x.view(-1, 1, 16, 16)  # Assuming input_dim=256, reshape to (batch_size, 1, 16, 16)
        x = self.resnet50(x)
        return x

# Example usage
input_dim = 256
num_classes = 10
model = ResNet50Custom(input_dim=input_dim, num_classes=num_classes)

# Create a dummy input tensor with shape (batch_size, input_dim)
dummy_input = torch.randn(8, input_dim)  # batch_size=8
output = model(dummy_input)
print(output.shape)  # Should print torch.Size([8, num_classes])

In [9]:
for i, (images, labels) in enumerate(data_loader):
    print(images.shape)
    print(labels)
    break

TypeError: Unexpected type <class 'numpy.ndarray'>

In [None]:
# Hyperparameters
input_dim = 6
model_dim = 64
num_heads = 8
num_layers = 3
output_dim = 6
learning_rate = 0.001
num_epochs = 100
image_size = (300,400)

# Generate some dummy data
batch_size = 32
sequence_length = 10

# Initialize model, loss function, and optimizer
model = TransformerRegressionModel(input_dim, model_dim, num_heads, num_layers, output_dim)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(x_train)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')

print("Training complete.")

In [3]:
def cal_P_and_N(select_id, category, data):
    data_test = data[(data['left_id'] == select_id) | (data['right_id'] == select_id)]

    data_test_group = data_test.groupby('category')
    # Check if the category exists in the grouped data
    if category not in data_test_group.groups:
        # raise KeyError(f"Category '{category}' not found in the data.")
        return None, None, None, None
    
    test_group = data_test_group.get_group(category)

    for i,line in test_group.iterrows():
        if line['winner'] == 'left':
            test_group.loc[i, "winner_id"] = line["left_id"]
            test_group.loc[i, "loser_id"] = line["right_id"]
        elif line['winner'] == 'right':
            test_group.loc[i, "winner_id"] = line["right_id"]
            test_group.loc[i, "loser_id"] = line["left_id"]
        else:
            test_group.loc[i, "winner_id"] = "equal"
            test_group.loc[i, "loser_id"] = "equal"

    winner_id = test_group['winner_id'].tolist()
    loser_id = test_group['loser_id'].tolist()

    p_i = len(test_group[test_group['winner_id'] == select_id]) 
    e_i = len(test_group[test_group['winner_id'] == "equal"]) 
    n_i = len(test_group) - p_i - e_i

    P_i = p_i / (p_i + e_i + n_i)
    N_i = n_i / (p_i + e_i + n_i)
    return P_i, N_i, winner_id, loser_id


In [4]:
def cal_Q(select_id, category, data):
    P_i, N_i, winner_id, loser_id = cal_P_and_N(select_id, category, data)
    if (P_i is None) or (N_i is None):
        return None

    sub_pi_list = []
    for i in winner_id:
        if (i != "equal") and (i != select_id):
            sub_P_i, _, _, _ = cal_P_and_N(i, category, data)
            sub_pi_list.append(sub_P_i)

    sub_ni_list = []
    for i in loser_id:
        if (i != "equal") and (i != select_id):
            _, sub_N_i, _, _ = cal_P_and_N(i, category, data)
            sub_ni_list.append(sub_N_i)

    if len(sub_pi_list) == 0:
        sub_P_i_avg = 0
    else:
        sub_P_i_avg = np.mean(sub_pi_list)
        
    if len(sub_ni_list) == 0:
        sub_n_i_avg = 0
    else:
        sub_n_i_avg = np.mean(sub_ni_list)
    Q = (1 / 3) * (P_i + sub_P_i_avg - sub_n_i_avg + 1)
    return Q

In [5]:
data_path = "/data_nas/cehou/LLM_safety/PlacePulse2.0/metadata/final_data.csv"
img_path = "/data_nas/cehou/LLM_safety/PlacePulse2.0/photo_dataset/final_photo_dataset"
data = pd.read_csv(data_path)
img_id_ls = [i.split('.')[0] for i in os.listdir(img_path)]
category = data['category'].value_counts().index.tolist()

In [6]:
# from concurrent.futures import ThreadPoolExecutor, as_completed

# def calculate_Q_for_image(img_id):
#     results = []
#     for c in category:
#         Q = cal_Q(img_id, c, data)
#         results.append([img_id, c, Q])
#     return results
# Q_ls = []
# with ThreadPoolExecutor(max_workers=50) as executor:
#     futures = [executor.submit(calculate_Q_for_image, img_id) for img_id in tqdm(img_id_ls)]
#     for future in as_completed(futures):
#         Q_ls.extend(future.result())
#         if len(Q_ls) % 100 == 0:
#             Q_df = pd.DataFrame(Q_ls, columns=['Image_ID', 'Category', 'Q_Value'])
#             Q_df.to_csv("/data_nas/cehou/LLM_safety/image_perception.csv", index=False)

In [15]:
Q_df = pd.DataFrame(Q_ls, columns=['Image_ID', 'Category', 'Q_Value'])
Q_df.to_csv("/data_nas/cehou/LLM_safety/image_perception.csv", index=False)

In [10]:

def calculate_Q_for_image(img_id):
    results = []
    for c in category:
        Q = cal_Q(img_id, c, data)
        results.append([img_id, c, Q])
    return results

Q_ls = []
# futures = [calculate_Q_for_image(img_id) for img_id in tqdm(img_id_ls)]
for img_id in tqdm(img_id_ls):
    results = calculate_Q_for_image(img_id)
    Q_ls.extend(results)
    if len(Q_ls) % 100 == 0:
        Q_df = pd.DataFrame(Q_ls, columns=['Image_ID', 'Category', 'Q_Value'])
        Q_df.to_csv("/data_nas/cehou/LLM_safety/image_perception.csv", index=False)
        print("Save to csv")


[A
[A
[A
[A
[A
  0%|          | 5/110688 [00:19<118:28:04,  3.85s/it]


KeyboardInterrupt: 