# Data Preprocessing

In [1]:
import pandas as pd
import re
import os

from PIL import Image
import cv2

import torch
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder

In [2]:
train_data = pd.read_csv("../data/Charades_v1_train.csv")
train_data

Unnamed: 0,id,subject,scene,quality,relevance,verified,script,objects,descriptions,actions,length
0,46GP8,HR43,Kitchen,6.0,7.0,Yes,A person cooking on a stove while watching som...,food;stove;window,A person cooks food on a stove before looking ...,c092 11.90 21.20;c147 0.00 12.60,24.83
1,N11GT,0KZ7,Stairs,6.0,7.0,Yes,"One person opens up a folded blanket, then sne...",blanket;broom;floor,Person at the bottom of the staircase shakes a...,c098 8.60 14.20;c075 0.00 11.70;c127 0.00 15.2...,18.33
2,0IH69,6RE8,Bedroom,6.0,5.0,Yes,A person is seen leaving a cabinet. They then ...,book;box;cabinet;shelf,A person is standing in a bedroom. They walk o...,,30.25
3,KRF68,YA10,Laundry room,6.0,7.0,Yes,A person runs into their laundry room. They gr...,clothes;door;phone,A person runs in and shuts door. The person gr...,c018 22.60 27.80;c141 4.10 9.60;c148 10.30 25....,30.33
4,MJO7C,6RE8,Kitchen,6.0,6.0,Yes,A person runs into their pantry holding a bott...,cup;phone,A person runs in place while holding a bottle ...,c015 0.00 32.00;c107 0.00 32.00,31.38
...,...,...,...,...,...,...,...,...,...,...,...
7980,7K2CS,HJZQ,Garage,6.0,6.0,Yes,Person enters the garage while sneezing. Perso...,chair;clothes;door;food;sandwich;shirt;spoon,"A enters through a doorway, sneezes, then clos...",c065 17.60 31.00;c067 17.60 31.00;c153 0.00 5....,30.08
7981,S2A89,KL48,Bathroom,7.0,7.0,Yes,"A person takes a chair and walks it over, plac...",chair;door,A PERSON IS TAKING A CHAIR FROM ONE ROOM TO TH...,c006 4.00 10.80;c141 4.40 10.90;c151 12.80 20....,19.29
7982,01O27,18IT,Bathroom,6.0,7.0,Yes,A person enters a bathroom and closes the door...,door;floor;mirror,A person is walking towards the bathroom. A pe...,c006 5.10 11.50;c008 0.50 6.60;c124 39.00 47.0...,46.08
7983,2MJ72,6RE8,Bedroom,6.0,6.0,Yes,A person opens a window in their laundry room....,door;towel;window,A person opens a window and looks out of it. ...,c006 11.00 17.00;c037 20.70 31.00;c092 0.60 8....,30.25


Some videos have multiple actions and some videos do not have any labelled actions

In [3]:
train_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7985 entries, 0 to 7984
Data columns (total 11 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   id            7985 non-null   object 
 1   subject       7985 non-null   object 
 2   scene         7985 non-null   object 
 3   quality       7968 non-null   float64
 4   relevance     7930 non-null   float64
 5   verified      7985 non-null   object 
 6   script        7985 non-null   object 
 7   objects       7982 non-null   object 
 8   descriptions  7985 non-null   object 
 9   actions       7811 non-null   object 
 10  length        7985 non-null   float64
dtypes: float64(3), object(8)
memory usage: 686.3+ KB


Remove all the rows that doesn't have a specified action

In [3]:
cleaned_data = train_data.dropna(subset = ['actions'])
cleaned_data.reset_index(drop = True, inplace = True)

In [4]:
cleaned_data.describe()

Unnamed: 0,quality,relevance,length
count,7795.0,7758.0,7811.0
mean,5.585247,6.227636,29.885159
std,1.186276,1.150637,9.40607
min,1.0,1.0,2.33
25%,5.0,6.0,26.96
50%,6.0,7.0,30.54
75%,6.0,7.0,32.12
max,7.0,7.0,194.33


Split rows into multiple rows such that each row only has 1 action

In [5]:
df = cleaned_data.loc[:, ['id', 'actions']]
df['actions'] = df['actions'].apply(lambda x: x.split(';'))
df = df.explode('actions', ignore_index = True)

In [6]:
df.head()

Unnamed: 0,id,actions
0,46GP8,c092 11.90 21.20
1,46GP8,c147 0.00 12.60
2,N11GT,c098 8.60 14.20
3,N11GT,c075 0.00 11.70
4,N11GT,c127 0.00 15.20


split the action, start and end time

In [7]:
action_start_end = list(df['actions'].apply(lambda x: x.split(' ')))
temp_df = pd.DataFrame(action_start_end, columns=['action', 'start_time', 'end_time'])
df = pd.concat([df[['id']], temp_df], axis=1)

In [8]:
df['start_time'] = df['start_time'].apply(float)
df['end_time'] = df['end_time'].apply(float)
df['duration'] = df['end_time'] - df['start_time']

In [10]:
df.head()

Unnamed: 0,id,action,start_time,end_time,duration
0,46GP8,c092,11.9,21.2,9.3
1,46GP8,c147,0.0,12.6,12.6
2,N11GT,c098,8.6,14.2,5.6
3,N11GT,c075,0.0,11.7,11.7
4,N11GT,c127,0.0,15.2,15.2


In [9]:
df["vid_class"] = df["action"].apply(lambda x: int(x[1:]))
df.head()

Unnamed: 0,id,action,start_time,end_time,duration,vid_class
0,46GP8,c092,11.9,21.2,9.3,92
1,46GP8,c147,0.0,12.6,12.6,147
2,N11GT,c098,8.6,14.2,5.6,98
3,N11GT,c075,0.0,11.7,11.7,75
4,N11GT,c127,0.0,15.2,15.2,127


In [10]:
df = df[df['duration'] > 0].reset_index(drop = True)

In [11]:
df.describe()

Unnamed: 0,start_time,end_time,duration,vid_class
count,49802.0,49802.0,49802.0,49802.0
mean,8.886778,21.807878,12.9211,80.144894
std,9.773149,11.035979,9.383733,49.95213
min,0.0,2.5,1.9,0.0
25%,0.0,13.2,6.0,33.0
50%,6.1,21.5,9.0,81.0
75%,15.0,30.6,17.3,123.0
max,155.1,195.0,151.4,156.0


In [12]:
df.head()

Unnamed: 0,id,action,start_time,end_time,duration,vid_class
0,46GP8,c092,11.9,21.2,9.3,92
1,46GP8,c147,0.0,12.6,12.6,147
2,N11GT,c098,8.6,14.2,5.6,98
3,N11GT,c075,0.0,11.7,11.7,75
4,N11GT,c127,0.0,15.2,15.2,127


In [14]:
df.to_csv("../data/train_data2.csv", index = False)

### Test data

In [15]:
test_data = pd.read_csv("../data/Charades_v1_test.csv")
test_data.head()

Unnamed: 0,id,subject,scene,quality,relevance,verified,script,objects,descriptions,actions,length
0,YSKX3,CP6Y,Bedroom,5.0,6.0,Yes,A person fixes the bed then throws pillow on it.,bed;blanket;mattress;pillow,A person looks under a mattress and pats the b...,c077 12.10 18.00;c079 11.80 17.30;c080 13.00 1...,16.62
1,T5ECU,M7K8,Living room,6.0,7.0,Yes,One person is smiling and drinking while watch...,chair;cup;dish;dog;vacuum;window,A person leaves the room holding an umbrella w...,c137 0.30 9.00;c092 0.00 18.90;c152 3.50 31.00...,31.29
2,AAH6R,ENHU,Closet / Walk-in closet / Spear closet,6.0,6.0,Yes,A person undressing in the doorway walks to a ...,chair;clothes;doorway;mirror,"A person is looking in the mirror, adjusting h...",c059 3.50 8.00;c151 3.00 7.60;c148 0.00 8.00;c...,7.38
3,015XE,GFWE,Kitchen,6.0,5.0,Yes,A person is taking a picture of a light while ...,camera;light;phone;picture,A person holds a camera up to a light on the c...,c015 0.00 32.10;c087 0.60 32.10;c016 0.00 32.1...,32.83
4,TJZ0P,ENHU,Home Office / Study (A room in a house used fo...,5.0,7.0,Yes,A person in their home office opening a book t...,arm;book;chair;food;sandwich;shelf;table;window,A person sitting down picks up a book. A perso...,c067 8.60 13.80;c028 6.90 11.90;c059 0.00 19.0...,17.71


In [16]:
test_data = test_data.dropna(subset = ['actions'])
test_data.reset_index(drop = True, inplace = True)

In [17]:
test_df = test_data.loc[:, ['id', 'actions']]
test_df['actions'] = test_df['actions'].apply(lambda x: x.split(';'))
test_df = test_df.explode('actions').reset_index(drop = True)

In [18]:
action_start_end = list(test_df['actions'].apply(lambda x: x.split(' ')))
test_temp_df = pd.DataFrame(action_start_end, columns=['action', 'start_time', 'end_time'])
test_df = pd.concat([test_df[['id']], test_temp_df], axis=1)

In [19]:
test_df['start_time'] = test_df['start_time'].apply(float)
test_df['end_time'] = test_df['end_time'].apply(float)
test_df['duration'] = test_df['end_time'] - test_df['start_time']

In [20]:
test_df = test_df[test_df['duration'] > 0].reset_index(drop = True)

In [21]:
test_df["vid_class"] = test_df["action"].apply(lambda x: int(x[1:]))

In [22]:
test_df.describe()

Unnamed: 0,start_time,end_time,duration,vid_class
count,16691.0,16691.0,16691.0,16691.0
mean,8.369223,20.64666,12.277437,80.084656
std,9.003389,10.158509,9.062844,49.412155
min,0.0,2.5,2.3,0.0
25%,0.0,12.0,5.7,34.0
50%,5.7,19.9,8.2,81.0
75%,14.2,30.0,16.3,121.5
max,53.2,73.0,73.0,156.0


In [23]:
test_df.to_csv("../data/test_data2.csv", index = False)