In [1]:
from utils import *
from timelinemodule	import TimelineModel
import argparse
import warnings
warnings.filterwarnings('ignore')

## Documents location

In [2]:

docpath = "../input_data_conllu/toronto_books_00.output"
gpunumber = 0
outpath = "../predictions/"

## Store into a dataframe

In [3]:
## Dependency Graph object
filename = docpath.split("/")[-1]
structures = get_structs(docpath)
print("\n###########   Parsing Conllu through PredPatt    ###########")

## Sentences
struct_dict = extract_struct_dicts(structures)

## A dataframe after processing the file through PredPatt and extracting
## roots and spans of each predicate. 
df = extract_dataframe(docpath, structures)

## Correct pred2_tokens as per the concatenated sentence
df['pred2_token_mod'] = df.apply(lambda row: correct_pred2_tokens(row, struct_dict), axis=1)
df['pred2_root_token_mod'] = df.apply(lambda row: correct_pred2_root(row, struct_dict), axis=1)
#Convert tokens into list of numbers
df['pred1_token_span'] = df['pred1_token'].map(lambda x: [int(y) for y in x.split("_")])
df['pred2_token_span'] = df['pred2_token_mod'].map(lambda x: [int(y) for y in x.split("_")])

## Extract X for model predictions
X = extract_X(df)



###########   Parsing Conllu through PredPatt    ###########
Number of sentences in the document: 10
Number of event pairs considered: 72


## Load and run the pairwise model

In [4]:
## Load the best model
squashed = True
baseline=False
loss_confidence = True
cuda_device_num = gpunumber
cuda_device_str = "cuda:" + str(cuda_device_num)
model_path = "../model/"
file_path = "model_param_param_param_1_0_128_128_0_0_0_0_0.0_0.5_relu_1.pth"

tokens = file_path.split("_")
eventatt = tokens[1]
duratt = tokens[2]
relatt = tokens[3]
concat_fine_to_dur = str2bool(tokens[-8])
concat_dur_to_fine = str2bool(tokens[-7])
fine_2_dur = str2bool(tokens[-6])
dur_2_fine = str2bool(tokens[-5])
weight = float(tokens[-4])
drop = float(tokens[-3])
activ = tokens[-2]
bino_bool = str2bool(tokens[-1].split(".")[0])
#coarse_size = int(tokens[-1].split(".")[0])
print("\n###########   Predicting Relative Timelines    ###########")
print("\nRelative Temporal Model configurations:")
print("Eventatt: {}, Duratt: {}, Relatt: {}, Dropout: {}, Activation: {}, Binomial: {}, concat_fine2dur: {}, concat_dur2fine:{}, fine_to_dur: {}, dur_to_fine: {} \n".format(
                                                                                                                        eventatt,
                                                                                                                        duratt,
                                                                                                                        relatt,
                                                                                                                        drop,
                                                                                                                        activ,
                                                                                                                        bino_bool,
                                                                                                                        concat_fine_to_dur,
                                                                                                                        concat_dur_to_fine,
                                                                                                                        fine_2_dur,
                                                                                                       dur_2_fine))
device = torch.device(cuda_device_str if torch.cuda.is_available() else "cpu")

best_model = TemporalModel(
                            embedding_size=1024, 
                            duration_distr = bino_bool,
                            elmo_class = ElmoEmbedder(options_file, weight_file, cuda_device=cuda_device_num),
                            mlp_dropout = drop,
                            mlp_activation= activ,
                            tune_embed_size=256,
                            event_attention=eventatt, 
                            dur_attention = duratt, 
                            rel_attention = relatt, 
                            concat_fine_to_dur  =concat_fine_to_dur,                      
                            concat_dur_to_fine = concat_dur_to_fine,
                            fine_to_dur = fine_2_dur,
                            dur_to_fine = dur_2_fine,
                            fine_squash = True,
                            baseline=False,
                            dur_MLP_sizes = [128], fine_MLP_sizes = [128],
                            dur_output_size = 11, fine_output_size = 4,
                            device= device)

best_model.load_state_dict(torch.load(model_path + file_path, map_location=cuda_device_str))
best_model.to(device)

p1_dur_yhat,p2_dur_yhat,fine_yhat,rel_yhat = predict_fine_dur_only(X, best_model)
print("Relative timelines completed!!\n")




###########   Predicting Relative Timelines    ###########

Relative Temporal Model configurations:
Eventatt: param, Duratt: param, Relatt: param, Dropout: 0.5, Activation: relu, Binomial: True, concat_fine2dur: False, concat_dur2fine:False, fine_to_dur: False, dur_to_fine: False 

Relative timelines completed!!



In [5]:
## Store predictions in the dataset
df['pred1_duration'] = p1_dur_yhat.cpu().numpy()
df['pred2_duration'] = p2_dur_yhat.cpu().numpy()
df['b1'] = [b1 for b1,d1,b2,d2 in fine_yhat.cpu().numpy()]
df['d1'] = [d1 for b1,d1,b2,d2 in fine_yhat.cpu().numpy()]
df['e1'] = df['b1'] + df['d1']
df['b2'] = [b2 for b1,d1,b2,d2 in fine_yhat.cpu().numpy()]
df['d2'] = [d2 for b1,d1,b2,d2 in fine_yhat.cpu().numpy()]
df['e2'] = df['b2'] + df['d2']
df = df.drop(['d1', 'd2'], axis=1)
df['sent_pred_id1'] = df['sentence_id_1'] + " " + df['pred1_root_token'].map(lambda x: str(x))
df['sent_pred_id2'] = df['sentence_id_2'] + " " + df['pred2_root_token'].map(lambda x: str(x))


## Document Timeilnes

In [6]:
## Document Timelines
pred_dict, num_preds, local_data = extract_preds(df)

## Run Timeline Model on current docid's data
model = TimelineModel(data = local_data,
         num_preds = num_preds,
        device=torch.device("cpu"))

print("###########   Creating document timelines    ###########")
pred_b1, pred_e1, pred_b2, pred_e2, pred_timeline  = model.fit(local_data, epochs=5000)

preds_arr = local_data[['sent_pred_id1', 'sent_pred_id2']].values
uniq_preds = np.unique(preds_arr.flatten())
#print(uniq_preds)

preds_text = extract_pred_text(uniq_preds, local_data)

ans_df = pd.DataFrame(data=pred_timeline, 
                     columns=['start_pt', 'duration'])
ans_df['sent_pred_id'] = uniq_preds
ans_df['pred_text'] = preds_text



  0%|          | 0/5000 [00:00<?, ?it/s]

###########   Creating document timelines    ###########
Epoch: 1, Loss: 3.379361152648926


 26%|██▋       | 1320/5000 [00:04<00:13, 272.09it/s]


Epoch: 1344, Converging-Loss: 0.9290573000907898


In [7]:
pred_b1, pred_e1, pred_b2, pred_e2, pred_timeline  = model.fit(local_data, epochs=5000)

  1%|          | 27/5000 [00:00<00:18, 265.92it/s]

Epoch: 1, Loss: 0.9278492331504822


 28%|██▊       | 1376/5000 [00:05<00:12, 279.54it/s]

Epoch: 1386, Converging-Loss: 0.5157613754272461





In [8]:
device

device(type='cuda', index=0)

In [9]:
model.pred_tensor

Parameter containing:
tensor([[-1.5365e+00,  5.5718e-02],
        [-1.5372e+00, -9.2672e-02],
        [-6.1619e-01,  8.8976e-02],
        [ 6.1581e-01, -1.0106e-01],
        [ 6.0650e-01,  3.1116e-01],
        [ 1.5392e+00, -7.6710e-02],
        [-1.5393e+00,  1.1233e-01],
        [ 1.5384e+00,  6.5114e-02],
        [-1.5383e+00,  6.8459e-02],
        [ 5.6968e-01, -9.1468e-02],
        [ 5.6911e-01, -4.9486e-02],
        [-5.6890e-01,  4.2131e-02],
        [ 5.6595e-01,  6.6302e-02],
        [-9.3672e-01, -3.1461e-04],
        [ 7.8927e-01,  5.1339e-01],
        [-7.8446e-01,  9.6793e-02],
        [ 7.8430e-01,  9.4901e-02],
        [ 7.8440e-01,  9.2749e-02],
        [ 7.8448e-01,  8.0203e-02],
        [ 6.3518e-01, -4.6026e-01],
        [ 7.8437e-01,  1.0608e-01],
        [-7.8389e-01, -3.6686e-01],
        [ 6.2765e-01,  9.7026e-02],
        [ 6.2440e-01,  1.1565e-01],
        [-6.2552e-01, -1.1014e-01],
        [-6.2517e-01,  1.1213e-01],
        [-6.2636e-01, -1.0781e-01],
      