In [1]:
# Code to mount the drive 
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [2]:
!pip install -U sentence-transformers



In [3]:
cd '/content/drive/MyDrive/685/catr'

/content/drive/.shortcut-targets-by-id/12c1zkm0_oa8VcOfsUa8Tn_YcYdyPpSlP/685/catr


In [4]:
!pip install -r requirements.txt



In [5]:
from transformers import ViTModel, ViTConfig, ViTFeatureExtractor,BertTokenizer,BertForMaskedLM
#from transformers import DeiTFeatureExtractor, DeiTModel #AutoFeatureExtractor, DeiTForImageClassificationWithTeacher, 
from sentence_transformers import SentenceTransformer
import torch
from sklearn.model_selection import KFold
from sklearn.metrics import auc, precision_score, recall_score,roc_auc_score
import xgboost as xgb
from PIL import Image
import argparse
import glob
import json
import pandas as pd
import numpy as np
from models import caption
from datasets import coco, utils
from tqdm import tqdm
from configuration import Config
from xgboost import XGBClassifier
import os

In [6]:
def extract_vision_transformer_feats(image_path,nsel_st=None,nsel_end=None):
  """
  Function to extract the features from vision transformers
  """
  #code to get all images present in a certain csv file
  csv_data=pd.read_csv("/content/drive/MyDrive/685/sarcasm/train_data/caption/train_caption.csv")
  img_list = [os.path.join(image_path,str(i+'.jpg')) for i in list(csv_data['image_hash'])]

  if not nsel_st:
    img_files_list = img_list#glob.glob(image_path+"*")
  else:
    img_files_list = img_list[nsel_st:nsel_end]#glob.glob(image_path+"*")[nsel_st:nsel_end]
  
  # Create image batch array for Vision Transformer
  img_batch = []
  for file in tqdm(img_files_list):
    img = np.asarray(Image.open(file))
    newsize = (240, 240, 3)
    img = np.resize(img,newsize)
    img_batch.append(img)
  print("Creation of image batches to be used for Vision Transformer complete")

  
  # Extract features from ViTModel
  feature_extractor   = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224-in21k')
  model               = ViTModel.from_pretrained('google/vit-base-patch16-224-in21k')
  #feature_extractor   =  DeiTFeatureExtractor.from_pretrained('facebook/deit-tiny-distilled-patch16-224')
  #model               = DeiTModel.from_pretrained('facebook/deit-tiny-distilled-patch16-224')
  inputs              = feature_extractor(images=img_batch, return_tensors="pt")
  outputs             = model(**inputs)

  # Get image representations and their corresponding hashes i.e. get [CLS] token representation for each image
  img_representations = outputs.last_hidden_state[:,0,:]
  img_hash_li         = []
  for file in img_files_list:
    img_hash_li.append(file.split("/")[-1][:-4])
  
  # Create column names for image dimensions 
  col_img  = ["imdim_"+str(i) for i in list(range(768))]

  # Create a dataframe of image features
  img_data = pd.DataFrame(img_representations.detach().numpy(),columns = col_img)
  img_data['img_hash'] = img_hash_li

  return img_data

def extract_sentence_transformer_feats(reference_file_pth = '/content/drive/MyDrive/685/sarcasm/train_data/caption/',csv_file = 'train_caption.csv',lab_assign=1):
  """
  Extract features for emotion related texts
  """
  # Get reference text data
  combined_path = reference_file_pth + csv_file
  #file = open(combined_path)
  #data_json = json.load(file)
  data = pd.read_csv(combined_path)#pd.DataFrame.from_dict(data_json['annotations']).reset_index(drop=True)
  data = data[1:1001].reset_index(drop=True)
  try:
    del data['Unnamed: 0']
  except:
    pass
  
  # Use Sentence Transformer to extract features
  model = SentenceTransformer('all-mpnet-base-v2')
  sentence_embeddings = model.encode(data['comment'])
  col_text = ["tdim_"+str(i) for i in list(range(768))]

  # Create text feature dataframe
  text_data = pd.DataFrame(sentence_embeddings,columns=col_text)
  text_data['img_hash'] = data['image_hash']
  text_data['label'] = lab_assign

  return text_data

def xgb_train_kfold(X_trn, y_trn,n_splits=5,max_depth=3,n_estimator=150, rand_st=3815):
  """
  Perform training with XGBoost and evaluate in K-Fold cross-validation settings 
  """
  errors    = []
  precision = []
  recall    = []
  auc       = []
  kf = KFold(n_splits=n_splits, shuffle=True, random_state=3815)

  for train_index, test_index in tqdm(kf.split(X_trn)):
     X_train_n, X_test_n = X_trn.values[train_index], X_trn.values[test_index]
     y_train_n, y_test_n = y_trn.values[train_index], y_trn.values[test_index]

     model = XGBClassifier(
         max_depth=max_depth, n_estimators=n_estimator, random_state = rand_st
     )
     model.fit(X_train_n, y_train_n)
     y_pred = model.predict(X_test_n)
     accuracy = (sum(y_pred == y_test_n))/len(y_test_n)
     errors.append(1 - accuracy)
     precision.append(precision_score(y_test_n,y_pred))
     recall.append(recall_score(y_test_n,y_pred))
     auc.append(roc_auc_score(y_test_n,y_pred))

  return errors, precision, recall, auc

In [7]:
img_data = extract_vision_transformer_feats(image_path= '/content/drive/MyDrive/685/sarcasm/train/',nsel_st=876,nsel_end=1001)

100%|██████████| 125/125 [00:24<00:00,  5.12it/s]


Creation of image batches to be used for Vision Transformer complete


In [8]:
img_data.to_csv('/content/drive/MyDrive/685/sarcasm/evaluation/Vision_trans/sarc_feats/img_feats_1000_batch.csv')

CONCAT ALL DATASETS INTO ONE 

In [9]:
%cd '/content/drive/MyDrive/685/sarcasm/evaluation/Vision_trans/sarc_feats'
data_200 = pd.read_csv('img_feats_125_batch.csv')
data_400 = pd.read_csv('img_feats_250_batch.csv')
data_600 = pd.read_csv('img_feats_375_batch.csv')
data_800 = pd.read_csv('img_feats_500_batch.csv')
data_1000 = pd.read_csv('img_feats_625_batch.csv')
data_1200 = pd.read_csv('img_feats_750_batch.csv')
data_1400 = pd.read_csv('img_feats_875_batch.csv')
data_1600 = pd.read_csv('img_feats_1000_batch.csv')
'''data_1800 = pd.read_csv('img_feats_1800_batch.csv')
data_2000 = pd.read_csv('img_feats_2000_batch.csv')'''

/content/drive/.shortcut-targets-by-id/12c1zkm0_oa8VcOfsUa8Tn_YcYdyPpSlP/685/sarcasm/evaluation/Vision_trans/sarc_feats


"data_1800 = pd.read_csv('img_feats_1800_batch.csv')\ndata_2000 = pd.read_csv('img_feats_2000_batch.csv')"

In [10]:
pos_data = pd.concat([data_200,data_400,data_600,data_800,data_1000,data_1200,data_1400,data_1600],axis=0)

In [11]:
pos_data.to_csv('/content/drive/MyDrive/685/sarcasm/evaluation/Vision_trans/sarc_image_feats.csv',index=False)

In [12]:
text_data = extract_sentence_transformer_feats()

Downloading:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/10.1k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/571 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/349 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/438M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/239 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/363 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/13.1k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [13]:
text_data.isnull().values.any()

False

In [14]:
text_data.to_csv('/content/drive/MyDrive/685/sarcasm/evaluation/Vision_trans/sarc_text_feats.csv',index=False)