In [0]:
from google.colab import drive
import os
drive.mount('/content/drive/')
os.chdir('/content/drive/My Drive/CA684_Assignment/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [0]:
!pip install pyprind



In [0]:
import pandas as pd

from tensorflow.python.keras import Sequential
from tensorflow.python.keras import layers
from tensorflow.python.keras import regularizers
from tensorflow.python.keras.preprocessing.text import Tokenizer

import numpy as np
from string import punctuation
import pyprind
from collections import Counter
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [0]:
def Get_score(Y_pred,Y_true):
    '''Calculate the Spearmann"s correlation coefficient'''
    Y_pred = np.squeeze(Y_pred)
    Y_true = np.squeeze(Y_true)
    if Y_pred.shape != Y_true.shape:
        print('Input shapes don\'t match!')
    else:
        if len(Y_pred.shape) == 1:
            Res = pd.DataFrame({'Y_true':Y_true,'Y_pred':Y_pred})
            score_mat = Res[['Y_true','Y_pred']].corr(method='spearman',min_periods=1)
            print('The Spearman\'s correlation coefficient is: %.3f' % score_mat.iloc[1][0])
        else:
            for ii in range(Y_pred.shape[1]):
                Get_score(Y_pred[:,ii],Y_true[:,ii])

In [0]:
# for reproducability
from numpy.random import seed
seed(1)
import tensorflow
tensorflow.random.set_seed(1)

# **Define functions**

In [0]:
def read_c3d(fname):
    """Scan vectors from file"""
    with open(fname) as f:
        for line in f:
            C3D =[float(item) for item in line.split()] # convert to float type, using default separator
    return C3D

def read_caps(fname):
    """Load the captions into a dataframe"""
    vn = []
    cap = []
    df = pd.DataFrame();
    with open(fname) as f:
        for line in f:
            pairs = line.split()
            vn.append(pairs[0])
            cap.append(pairs[1])
        df['video']=vn
        df['caption']=cap
    return df

def read_HMP(fname):
    """Scan HMP(Histogram of Motion Patterns) features from file"""
    with open(fname) as f:
        for line in f:
            pairs=line.split()
            HMP_temp = { int(p.split(':')[0]) : float(p.split(':')[1]) for p in pairs}
    # there are 6075 bins, fill zeros
    HMP = np.zeros(6075)
    for idx in HMP_temp.keys():
        HMP[idx-1] = HMP_temp[idx]            
    return HMP
  
def read_ColorHistogram(fname):
    """Scan Color Histogram from file
    Input file contains RGB histogram,
    Return a matrix of (3,256)"""
    RGB_Hist = np.zeros((3,256))
    with open(fname) as f:
        i_l = 0 # line index
        for line in f:
            pairs = line.split()
            hist_dict = {int(p.split(':')[0]):float(p.split(':')[1]) for p in pairs}
            for idx in hist_dict.keys():
                RGB_Hist[i_l,idx] = hist_dict[idx]
            i_l += 1
    return RGB_Hist

# **Load features(C3D) and train the model**

In [0]:
import os
c3d_path = './Dev-set/C3D/'
df_c3d = []
path_list = os.listdir(c3d_path)
path_list.sort(key=lambda x:int(x[5:-4]))
pbar=pyprind.ProgBar(len(path_list),title='Converting C3D')
for name in path_list:
  df_c3d.append(np.array(read_c3d(c3d_path+name)))
  pbar.update()

Converting C3D
0% [##############################] 100% | ETA: 00:00:00
Total time elapsed: 00:00:08


In [0]:
print(len(df_c3d[0]))

101


In [0]:
# load the ground truth values
label_path = './Dev-set/Ground-truth/'
labels=pd.read_csv(label_path+'ground-truth.csv')

In [0]:
# C3D
X = df_c3d
Y = labels[['short-term_memorability','long-term_memorability']].values

In [0]:
# Splitting the dataset into the Training set and Test set
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2,random_state=42)

In [0]:
from sklearn.ensemble import RandomForestRegressor
rf_regressor = RandomForestRegressor(n_estimators=100,random_state=42,verbose=2)

In [0]:
rf_regressor.fit(X_train,Y_train)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


building tree 1 of 100


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.4s remaining:    0.0s


building tree 2 of 100
building tree 3 of 100
building tree 4 of 100
building tree 5 of 100
building tree 6 of 100
building tree 7 of 100
building tree 8 of 100
building tree 9 of 100
building tree 10 of 100
building tree 11 of 100
building tree 12 of 100
building tree 13 of 100
building tree 14 of 100
building tree 15 of 100
building tree 16 of 100
building tree 17 of 100
building tree 18 of 100
building tree 19 of 100
building tree 20 of 100
building tree 21 of 100
building tree 22 of 100
building tree 23 of 100
building tree 24 of 100
building tree 25 of 100
building tree 26 of 100
building tree 27 of 100
building tree 28 of 100
building tree 29 of 100
building tree 30 of 100
building tree 31 of 100
building tree 32 of 100
building tree 33 of 100
building tree 34 of 100
building tree 35 of 100
building tree 36 of 100
building tree 37 of 100
building tree 38 of 100
building tree 39 of 100
building tree 40 of 100
building tree 41 of 100
building tree 42 of 100
building tree 43 of 100


[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:   42.2s finished


RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=None, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=100, n_jobs=None, oob_score=False,
                      random_state=42, verbose=2, warm_start=False)

In [0]:
rf_pred = rf_regressor.predict(X_test)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.1s finished


In [0]:
Get_score(rf_pred, Y_test)

The Spearman's correlation coefficient is: 0.314
The Spearman's correlation coefficient is: 0.117


# **Load features(Captions) and train the model**

In [0]:
# load labels and captions
def read_caps(fname):
    """Load the captions into a dataframe"""
    vn = []
    cap = []
    df = pd.DataFrame();
    with open(fname) as f:
        for line in f:
            pairs = line.split()
            vn.append(pairs[0])
            cap.append(pairs[1])
        df['video']=vn
        df['caption']=cap
    return df


# load the captions
cap_path = './Dev-set/Captions/dev-set_video-captions.txt'
df_cap=read_caps(cap_path)

In [0]:
counts = Counter()
# setup prograss tracker
pbar = pyprind.ProgBar(len(df_cap['caption']), title='Counting word occurrences')
for i, cap in enumerate(df_cap['caption']):
    # replace punctuations with space
    # convert words to lower case 
    text = ''.join([c if c not in punctuation else ' ' for c in cap]).lower()
    df_cap.loc[i,'caption'] = text
    pbar.update()
    counts.update(text.split())

Counting word occurrences
0% [##############################] 100% | ETA: 00:00:00
Total time elapsed: 00:00:01


In [0]:
# build the word index
len_token = len(counts)
tokenizer = Tokenizer(num_words=len_token)
print(len_token)

5191


In [0]:
tokenizer.fit_on_texts(list(df_cap.caption.values)) #fit a list of captions to the tokenizer
#the tokenizer vectorizes a text corpus, by turning each text into either a sequence of integers 

In [0]:
one_hot_res = tokenizer.texts_to_matrix(list(df_cap.caption.values),mode='binary')
sequences = tokenizer.texts_to_sequences(list(df_cap.caption.values))

In [0]:
# calculating max length
max_len = 50

In [0]:
X_seq = np.zeros((len(sequences),max_len))
for i in range(len(sequences)):
    n = len(sequences[i])
    if n==0:
        print(i)
    else:
        X_seq[i,-n:] = sequences[i]
X_seq.shape

(6000, 50)

In [0]:
print(len(X_seq[0]))

50


In [0]:
X = X_seq
Y = labels[['short-term_memorability','long-term_memorability']].values

In [0]:
print(len(X))
print(len(Y))

6000
6000


In [0]:
# Splitting the dataset into the Training set and Test set
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2,random_state=40)

In [0]:
from sklearn.ensemble import RandomForestRegressor
rf_regressor = RandomForestRegressor(n_estimators=100,random_state=42,verbose=2)

In [0]:
rf_regressor.fit(X_train,Y_train)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s


building tree 1 of 100
building tree 2 of 100
building tree 3 of 100
building tree 4 of 100
building tree 5 of 100
building tree 6 of 100
building tree 7 of 100
building tree 8 of 100
building tree 9 of 100
building tree 10 of 100
building tree 11 of 100
building tree 12 of 100
building tree 13 of 100
building tree 14 of 100
building tree 15 of 100
building tree 16 of 100
building tree 17 of 100
building tree 18 of 100
building tree 19 of 100
building tree 20 of 100
building tree 21 of 100
building tree 22 of 100
building tree 23 of 100
building tree 24 of 100
building tree 25 of 100
building tree 26 of 100
building tree 27 of 100
building tree 28 of 100
building tree 29 of 100
building tree 30 of 100
building tree 31 of 100
building tree 32 of 100
building tree 33 of 100
building tree 34 of 100
building tree 35 of 100
building tree 36 of 100
building tree 37 of 100
building tree 38 of 100
building tree 39 of 100
building tree 40 of 100
building tree 41 of 100
building tree 42 of 100
b

[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    4.7s finished


RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=None, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=100, n_jobs=None, oob_score=False,
                      random_state=42, verbose=2, warm_start=False)

In [0]:
rf_pred = rf_regressor.predict(X_test)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished


In [0]:
Get_score(rf_pred, Y_test)

The Spearman's correlation coefficient is: 0.258
The Spearman's correlation coefficient is: 0.130


# **Use (C3D+Captions) train the model**

In [0]:
# C3D + Captions
df_c3d_cap = df_c3d
counter = 0
for item in range(6000):
  df_c3d_cap[counter] = np.append(df_c3d_cap[counter],X_seq[counter],axis=0)
  counter = counter+1

In [0]:
print(len(df_c3d_cap[0]))

151


In [0]:
X = df_c3d_cap
Y = labels[['short-term_memorability','long-term_memorability']].values

In [0]:
# Splitting the dataset into the Training set and Test set
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2,random_state=42)

In [0]:
from sklearn.ensemble import RandomForestRegressor
rf_regressor = RandomForestRegressor(n_estimators=100,random_state=42,verbose=2)

In [0]:
rf_regressor.fit(X_train,Y_train)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


building tree 1 of 100


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.4s remaining:    0.0s


building tree 2 of 100
building tree 3 of 100
building tree 4 of 100
building tree 5 of 100
building tree 6 of 100
building tree 7 of 100
building tree 8 of 100
building tree 9 of 100
building tree 10 of 100
building tree 11 of 100
building tree 12 of 100
building tree 13 of 100
building tree 14 of 100
building tree 15 of 100
building tree 16 of 100
building tree 17 of 100
building tree 18 of 100
building tree 19 of 100
building tree 20 of 100
building tree 21 of 100
building tree 22 of 100
building tree 23 of 100
building tree 24 of 100
building tree 25 of 100
building tree 26 of 100
building tree 27 of 100
building tree 28 of 100
building tree 29 of 100
building tree 30 of 100
building tree 31 of 100
building tree 32 of 100
building tree 33 of 100
building tree 34 of 100
building tree 35 of 100
building tree 36 of 100
building tree 37 of 100
building tree 38 of 100
building tree 39 of 100
building tree 40 of 100
building tree 41 of 100
building tree 42 of 100
building tree 43 of 100


[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:   47.9s finished


RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=None, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=100, n_jobs=None, oob_score=False,
                      random_state=42, verbose=2, warm_start=False)

In [0]:
rf_pred = rf_regressor.predict(X_test)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished


In [0]:
Get_score(rf_pred, Y_test)

The Spearman's correlation coefficient is: 0.321
The Spearman's correlation coefficient is: 0.111


# **Load features(HMP) and train the model**

In [0]:
import os
hmp_path = './Dev-set/HMP/'
df_hmp = []
path_list = os.listdir(hmp_path)
path_list.sort(key=lambda x:int(x[5:-4]))
pbar=pyprind.ProgBar(len(path_list),title='Converting HMP')
for name in path_list:
  df_hmp.append(np.array(read_HMP(hmp_path+name)))
  pbar.update()

Converting HMP
0% [##############################] 100% | ETA: 00:00:00
Total time elapsed: 00:00:34


In [0]:
print(len(df_hmp[0]))

6075


In [0]:
X = df_hmp
Y = labels[['short-term_memorability','long-term_memorability']].values

In [0]:
# Splitting the dataset into the Training set and Test set
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2,random_state=42)

In [0]:
from sklearn.ensemble import RandomForestRegressor
rf_regressor = RandomForestRegressor(n_estimators=100,random_state=42,verbose=2)

In [0]:
rf_regressor.fit(X_train,Y_train)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


building tree 1 of 100


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   15.9s remaining:    0.0s


building tree 2 of 100
building tree 3 of 100
building tree 4 of 100
building tree 5 of 100
building tree 6 of 100
building tree 7 of 100
building tree 8 of 100
building tree 9 of 100
building tree 10 of 100
building tree 11 of 100
building tree 12 of 100
building tree 13 of 100
building tree 14 of 100
building tree 15 of 100
building tree 16 of 100
building tree 17 of 100
building tree 18 of 100
building tree 19 of 100
building tree 20 of 100
building tree 21 of 100
building tree 22 of 100
building tree 23 of 100
building tree 24 of 100
building tree 25 of 100
building tree 26 of 100
building tree 27 of 100
building tree 28 of 100
building tree 29 of 100
building tree 30 of 100
building tree 31 of 100
building tree 32 of 100
building tree 33 of 100
building tree 34 of 100
building tree 35 of 100
building tree 36 of 100
building tree 37 of 100
building tree 38 of 100
building tree 39 of 100
building tree 40 of 100
building tree 41 of 100
building tree 42 of 100
building tree 43 of 100


[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed: 27.3min finished


RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=None, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=100, n_jobs=None, oob_score=False,
                      random_state=42, verbose=2, warm_start=False)

In [0]:
rf_pred = rf_regressor.predict(X_test)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.1s finished


In [0]:
Get_score(rf_pred, Y_test)

The Spearman's correlation coefficient is: 0.294
The Spearman's correlation coefficient is: 0.097


# **Use (C3D+Captions+HMP) train the model**

In [0]:
# C3D + Captions +HMP
df_c3d_cap_hmp = df_c3d_cap
counter = 0
for item in range(6000):
  df_c3d_cap_hmp[counter] = np.append(df_c3d_cap_hmp[counter],df_hmp[counter])
  counter = counter+1

In [0]:
print(len(df_c3d_cap_hmp[0]))

6226


In [0]:
X = df_c3d_cap_hmp
Y = labels[['short-term_memorability','long-term_memorability']].values

In [0]:
print(len(X))
print(len(Y))

6000
6000


In [0]:
# Splitting the dataset into the Training set and Test set
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2,random_state=42)

In [0]:
from sklearn.ensemble import RandomForestRegressor
regressor = RandomForestRegressor(n_estimators=100,random_state=42,verbose=2)

In [0]:
regressor.fit(X_train,Y_train)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


building tree 1 of 100


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   16.6s remaining:    0.0s


building tree 2 of 100
building tree 3 of 100
building tree 4 of 100
building tree 5 of 100
building tree 6 of 100
building tree 7 of 100
building tree 8 of 100
building tree 9 of 100
building tree 10 of 100
building tree 11 of 100
building tree 12 of 100
building tree 13 of 100
building tree 14 of 100
building tree 15 of 100
building tree 16 of 100
building tree 17 of 100
building tree 18 of 100
building tree 19 of 100
building tree 20 of 100
building tree 21 of 100
building tree 22 of 100
building tree 23 of 100
building tree 24 of 100
building tree 25 of 100
building tree 26 of 100
building tree 27 of 100
building tree 28 of 100
building tree 29 of 100
building tree 30 of 100
building tree 31 of 100
building tree 32 of 100
building tree 33 of 100
building tree 34 of 100
building tree 35 of 100
building tree 36 of 100
building tree 37 of 100
building tree 38 of 100
building tree 39 of 100
building tree 40 of 100
building tree 41 of 100
building tree 42 of 100
building tree 43 of 100


[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed: 27.0min finished


RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=None, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=100, n_jobs=None, oob_score=False,
                      random_state=42, verbose=2, warm_start=False)

In [0]:
rf_pred = regressor.predict(X_test)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.1s finished


In [0]:
Get_score(rf_pred, Y_test)

The Spearman's correlation coefficient is: 0.328
The Spearman's correlation coefficient is: 0.130


In [0]:
#save model
import pickle
with open("/content/drive/My Drive/rf_pred.pkl", "wb") as f:
 pickle.dump(regressor, f)

In [0]:
#reuse model
with open("/content/drive/My Drive/rf_pred.pkl", "rb") as f:
 model = pickle.load(f)

# **Test the model**

In [0]:
# C3D
import os
c3dPath = './Test-set/C3D_test/'
df_c3d_test=[]
path_list_c3d = os.listdir(c3dPath)
path_list_c3d.sort(key=lambda x:int(x[5:-4]))
pbar = pyprind.ProgBar(len(path_list_c3d),title='Converting C3D')
for name in path_list_c3d:
    df_c3d_test.append(np.array(read_c3d(c3dPath+name)))
    pbar.update()

Converting C3D
0% [##############################] 100% | ETA: 00:00:00
Total time elapsed: 00:00:02


In [0]:
print(len(df_c3d_test[0]))

101


In [0]:
#load captions
captions_path ='./Test-set/Captions_test/test-set-1_video-captions.txt'
captions = read_caps(captions_path)

In [0]:
counts = Counter()
# setup prograss tracker
pbar = pyprind.ProgBar(len(df_cap['caption']), title='Counting word occurrences')
for i, cap in enumerate(df_cap['caption']):
    # replace punctuations with space
    # convert words to lower case 
    text = ''.join([c if c not in punctuation else ' ' for c in cap]).lower()
    df_cap.loc[i,'caption'] = text
    pbar.update()
    counts.update(text.split())

Counting word occurrences
0% [##############################] 100% | ETA: 00:00:00
Total time elapsed: 00:00:01


In [0]:
# build the word index
len_token = len(counts)
tokenizer = Tokenizer(num_words=len_token)
print(len_token)

5191


In [0]:
tokenizer.fit_on_texts(list(df_cap.caption.values)) #fit a list of captions to the tokenizer
#the tokenizer vectorizes a text corpus, by turning each text into either a sequence of integers 

In [0]:
one_hot_res = tokenizer.texts_to_matrix(list(df_cap.caption.values),mode='binary')
sequences = tokenizer.texts_to_sequences(list(df_cap.caption.values))

In [0]:
# calculating max length
max_len = 50

In [0]:
X_seq_test = np.zeros((len(sequences),max_len))
for i in range(len(sequences)):
    n = len(sequences[i])
    if n==0:
        print(i)
    else:
        X_seq_test[i,-n:] = sequences[i]
X_seq_test.shape

(6000, 50)

In [0]:
print(len(X_seq_test[0]))

50


In [0]:
#####HMP########
import os
hmpPath = './Test-set/HMP_test/'
df_hmp_test=[]
path_list_hmp = os.listdir(hmpPath)
path_list_hmp.sort(key=lambda x:int(x[5:-4]))
pbar = pyprind.ProgBar(len(path_list_hmp),title='Converting HMP')
for name in path_list_hmp:
    df_hmp_test.append(np.array(read_HMP(hmpPath+name)))
    pbar.update()

Converting HMP
0% [##############################] 100% | ETA: 00:00:00
Total time elapsed: 00:00:11


In [0]:
print(len(df_hmp_test[0]))

6075


In [0]:
#####C3D + Captions########
df_c3d_cap_test = df_c3d_test
counter = 0

for item in range(2000):
    df_c3d_cap_test[counter] = np.append(df_c3d_cap_test[counter],X_seq[counter],axis=0)
    counter = counter+1

In [0]:
print(len(df_c3d_cap_test[0]))

151


In [0]:
#####C3D + Captions +HMP########
df_c3d_cap_hmp_test = df_c3d_cap_test
counter = 0

for item in range(2000):
    df_c3d_cap_hmp_test[counter] = np.append(df_c3d_cap_hmp_test[counter],df_hmp_test[counter],axis=0)
    counter = counter+1

In [0]:
len(df_c3d_cap_hmp_test[0])

6226

In [0]:
#importing test Dataset
csv_path ='./Test-set/Ground-truth_test/ground_truth_template.csv'
test_dataset = pd.read_csv(csv_path)

In [0]:
test_pred = regressor.predict(df_c3d_cap_hmp_test)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.1s finished


In [0]:
test_pred = pd.DataFrame(test_pred)

In [0]:
test_dataset['short-term_memorability'] = test_pred[0]
test_dataset['long-term_memorability'] = test_pred[1]

In [0]:
test_dataset.head(5)

Unnamed: 0,video,short-term_memorability,nb_short-term_annotations,long-term_memorability,nb_long-term_annotations
0,7494,0.857608,33,0.7912,12
1,7495,0.84222,34,0.79876,10
2,7496,0.86233,32,0.77321,13
3,7497,0.893337,33,0.80513,10
4,7498,0.84929,33,0.73414,10


In [0]:
test_dataset.to_csv("/content/drive/My Drive/result.csv",index=False)

# **Load features(ColorHistogram) and use (C3D+HMP+CH) train the model**

In [0]:
def read_ColorHistogram(fname):
    """Scan Color Histogram from file
    Input file contains RGB histogram,
    Return a matrix of (3,256)"""
    RGB_Hist = np.zeros((3,256))
    with open(fname) as f:
        i_l = 0 # line index
        for line in f:
            pairs = line.split()
            hist_dict = {int(p.split(':')[0]):float(p.split(':')[1]) for p in pairs}
            for idx in hist_dict.keys():
                RGB_Hist[i_l,idx] = hist_dict[idx]
            i_l += 1
    return RGB_Hist

In [0]:
import os
ch_path = './Dev-set/ColorHistogram/'
df_ch = []
path_list = os.listdir(ch_path)
path_list.sort(key=lambda x:(x[5:-4],x[7:-4]))
pbar=pyprind.ProgBar(len(path_list),title='Converting Color Histogram')
for name in path_list:
  df_ch.append(np.array(read_ColorHistogram(ch_path+name)))
  pbar.update()

Converting Color Histogram
0% [##############################] 100% | ETA: 00:00:00
Total time elapsed: 01:35:00


In [0]:
#######C3D +HMP +Color Histograms#######
df_c3d_hmp = df_c3d
counter = 0
for item in range(6000):
  df_c3d_hmp[counter] = np.append(df_c3d_hmp[counter],df_hmp[counter],axis=0)
  counter = counter+1

df_c3d_hmp_ch = df_c3d_hmp
counter = 0
for item in range(6000):
  df_c3d_hmp_ch[counter] = np.append(df_c3d_hmp_ch[counter],df_ch[counter])
  counter = counter+1

In [0]:
X = df_c3d_hmp_ch
Y = labels[['short-term_memorability','long-term_memorability']].values

# Splitting the dataset into the Training set and Test set
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2,random_state=42)

from sklearn.ensemble import RandomForestRegressor
rf_regressor = RandomForestRegressor(n_estimators=100,random_state=42,verbose=2)

rf_regressor.fit(X_train,Y_train)

rf_pred = rf_regressor.predict(X_test)

Get_score(rf_pred, Y_test)

# **Use (C3D+Captions+HMP+CH) train the model**

In [0]:
#######C3D + Captions +HMP +Color Histograms#######
df_c3d_cap_hmp_ch = df_c3d_cap_hmp
counter = 0
for item in range(6000):
  df_c3d_cap_hmp_ch[counter] = np.append(df_c3d_cap_hmp_ch[counter],df_ch[counter])
  counter = counter+1

In [0]:
X = df_c3d_cap_hmp_ch
Y = labels[['short-term_memorability','long-term_memorability']].values

In [0]:
# Splitting the dataset into the Training set and Test set
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2,random_state=42)

In [0]:
from sklearn.ensemble import RandomForestRegressor
rf_regressor = RandomForestRegressor(n_estimators=100,random_state=42,verbose=2)

In [0]:
rf_regressor.fit(X_train,Y_train)

In [0]:
rf_pred = rf_regressor.predict(X_test)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.1s finished


In [0]:
Get_score(rf_pred, Y_test)

The Spearman's correlation coefficient is: 0.315
The Spearman's correlation coefficient is: 0.146
