In [18]:
import torch
import os
from datasets import load_dataset
import pandas as pd

## Load Predictions on Coco

In [2]:
checkpoint_path = os.getcwd() 
checkpoint_file = os.path.join(checkpoint_path, 'coco_predictions_checkpoint.pt')

In [3]:
predictions_list = torch.load(checkpoint_file)
print("Loaded predictions list successfully.")
print(f"Number of prediction entries: {len(predictions_list)}")

Loaded predictions list successfully.
Number of prediction entries: 1860


In [4]:
concatenated_tensor = torch.cat(predictions_list, dim=0)
concatenated_tensor.shape

torch.Size([119040, 29])

In [5]:
! pip install datasets



In [6]:
PATH_TO_IMAGE_FOLDER = "COCO2017"

def create_full_path(example):
    """Create full path to image using `base_path` to COCO2017 folder."""
    example["image_path"] = os.path.join(PATH_TO_IMAGE_FOLDER, example["file_name"])
    return example

dataset = load_dataset("phiyodr/coco2017")
dataset = dataset.map(create_full_path)

In [7]:
len_coco = len(dataset['train'])
len_coco

118287

In [8]:
coco_emotion_predictions = concatenated_tensor[:len_coco] # slice because of shape mismatch

## Create New Dataset

In [10]:
dataset['train'][0]

{'license': 3,
 'file_name': 'train2017/000000391895.jpg',
 'coco_url': 'http://images.cocodataset.org/train2017/000000391895.jpg',
 'height': 360,
 'width': 640,
 'date_captured': '2013-11-14 11:18:45',
 'flickr_url': 'http://farm9.staticflickr.com/8186/8119368305_4e622c8349_z.jpg',
 'image_id': 391895,
 'ids': [770337, 771687, 772707, 776154, 781998],
 'captions': ['A man with a red helmet on a small moped on a dirt road. ',
  'Man riding a motor bike on a dirt road on the countryside.',
  'A man riding on the back of a motorcycle.',
  'A dirt path with a young person on a motor bike rests to the foreground of a verdant area with a bridge and a background of cloud-wreathed mountains. ',
  'A man in a red shirt and a red hat is on a motorcycle on a hill side.'],
 'image_path': 'COCO2017/train2017/000000391895.jpg'}

In [14]:
coco_emotion_predictions # convert this to a dataframe with appropriate column names (emotions)

tensor([[0., 0., 0.,  ..., 1., 1., 0.],
        [0., 0., 0.,  ..., 1., 1., 0.],
        [0., 0., 0.,  ..., 1., 1., 0.],
        ...,
        [0., 0., 0.,  ..., 1., 1., 0.],
        [0., 0., 0.,  ..., 1., 1., 0.],
        [0., 0., 1.,  ..., 1., 1., 0.]])

In [16]:
label_map = {'curious': 0,
 'amazed': 1,
 'fear': 2,
 'awe': 3,
 'neutral': 4,
 'disgusted': 5,
 'worried': 6,
 'intrigued': 7,
 'confused': 8,
 'beautiful': 9,
 'happy': 10,
 'annoyed': 11,
 'impressed': 12,
 'sad': 13,
 'proud': 14,
 'inspired': 15,
 'angry': 16,
 'excited': 17,
 'nostalgic': 18,
 'upset': 19,
 'concerned': 20,
 'good': 21,
 'hopeful': 22,
 'anger': 23,
 'joy': 24,
 'interested': 25,
 'calm': 26,
 'bored': 27,
 'scared': 28}

In [19]:
emotion_array = coco_emotion_predictions.numpy()

df_emotions = pd.DataFrame(emotion_array, columns=label_map.keys())

In [21]:
df_emotions

Unnamed: 0,curious,amazed,fear,awe,neutral,disgusted,worried,intrigued,confused,beautiful,...,upset,concerned,good,hopeful,anger,joy,interested,calm,bored,scared
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
118282,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0
118283,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0
118284,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0
118285,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0


In [22]:
df_emotions.describe()

Unnamed: 0,curious,amazed,fear,awe,neutral,disgusted,worried,intrigued,confused,beautiful,...,upset,concerned,good,hopeful,anger,joy,interested,calm,bored,scared
count,118287.0,118287.0,118287.0,118287.0,118287.0,118287.0,118287.0,118287.0,118287.0,118287.0,...,118287.0,118287.0,118287.0,118287.0,118287.0,118287.0,118287.0,118287.0,118287.0,118287.0
mean,0.0,4.2e-05,0.012952,0.0,0.001074,0.0,0.0,0.000135,0.499954,0.118618,...,4.2e-05,0.091075,0.014237,0.0,0.006586,0.744131,0.011235,0.999831,0.999459,0.0
std,0.0,0.006501,0.113066,0.0,0.032749,0.0,0.0,0.01163,0.500002,0.32334,...,0.006501,0.287717,0.118465,0.0,0.080885,0.43635,0.1054,0.013002,0.023254,0.0
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0
75%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0
max,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,...,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0


In [26]:
final_df = pd.concat([pd.DataFrame({'coco_url': dataset['train']['coco_url'], 'captions': dataset['train']['captions']}), df_emotions], axis=1)

In [27]:
final_df.head()

Unnamed: 0,coco_url,captions,curious,amazed,fear,awe,neutral,disgusted,worried,intrigued,...,upset,concerned,good,hopeful,anger,joy,interested,calm,bored,scared
0,http://images.cocodataset.org/train2017/000000...,[A man with a red helmet on a small moped on a...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0
1,http://images.cocodataset.org/train2017/000000...,[A woman wearing a net on her head cutting a c...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0
2,http://images.cocodataset.org/train2017/000000...,[A child holding a flowered umbrella and petti...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0
3,http://images.cocodataset.org/train2017/000000...,[A young boy standing in front of a computer k...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0
4,http://images.cocodataset.org/train2017/000000...,[a boy wearing headphones using one computer i...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0


In [28]:
os.getcwd()

'/home/ota231/Project'

In [29]:
final_df.to_csv('coco_predictions.csv', index=False)