# Predicting the Output of the Given Test Cases

### Objectives

#### 1. Loading the Trained Model
#### 2. Calculating Joint Probability
#### 3. Making Predication for Given Test Case
#### 4. Storing the Predictions in Files

## Notebook Imports

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import itertools
from sklearn.metrics import confusion_matrix

%matplotlib inline

## File Paths


In [23]:
#------ Manually Added Files

TESTING_DATA_PATH = 'Testing_Data/Testing_Data.csv'

#------ Files Generated in Previous Steps

PREAMBLE_WORD_PROB = 'Trained_Model/preamble_word_prob.txt'
LENDER_DEFAULTING_WORD_PROB = 'Trained_Model/lender_defaulting_word_prob.txt' 
GOVERNING_LAW_WORD_PROB = 'Trained_Model/governing_law_word_prob.txt' 
INDEMNIFICATION_WORD_PROB = 'Trained_Model/indemnification_word_prob.txt'
OTHER_WORD_PROB = 'Trained_Model/other_word_prob.txt'
WORD_PROB = 'Trained_Model/word_prob.txt'
PROB_CLASS_PATH = 'Trained_Model/prob_class.txt'
TESTING_DATA_FULL_MATRIX = 'Testing_Data/testing_full_matrix.txt'

#------ Files Generated in Current Step

TESTING_PREDICTION_PATH_CSV = 'Testing_Data/Testing_Prediction.csv'
TESTING_PREDICTION_PATH_JSON = 'Testing_Data/Testing_Prediction.json'

## Constants

In [4]:
Preamble_classification_id = 0
Lender_defaulting_classification_id = 1
Governing_law_classification_id = 2
Indemnification_classification_id = 3
Other_classification_id = 4

LABEL_NAMES = ['Preamble', 'Lender Defaulting', 'Governing Law', 'Indemnification', 'Other']

## Load the Data

In [5]:
testing_data = np.loadtxt(TESTING_DATA_FULL_MATRIX , delimiter = ' ')


prob_word_preamble = np.loadtxt(PREAMBLE_WORD_PROB, delimiter = ' ')
prob_word_lender_defaulting = np.loadtxt(LENDER_DEFAULTING_WORD_PROB, delimiter = ' ')
prob_word_governing_law = np.loadtxt(GOVERNING_LAW_WORD_PROB, delimiter = ' ')
prob_word_indemnification = np.loadtxt(INDEMNIFICATION_WORD_PROB, delimiter = ' ') 
prob_word_other = np.loadtxt(OTHER_WORD_PROB, delimiter = ' ') 
prob_word = np.loadtxt(WORD_PROB, delimiter = ' ')
prob_class = np.loadtxt(PROB_CLASS_PATH, delimiter = ' ')

## Calculating the Join Probability 
$$P(Class \, | \, Word) = \frac{P(Word \, | \, Class) \, P(Class)} {P(Word)}$$

In [6]:
joint_log_preamble = testing_data.dot(np.log(prob_word_preamble) - np.log(prob_word)) + np.log(prob_class[Preamble_classification_id])
joint_log_preamble_list =[]
for item in joint_log_preamble:
    joint_log_preamble_list.append(item)
    
joint_log_preamble_list    

[-5.18962375956347,
 -14.081503263715542,
 -7.905845974665435,
 -10.358854616335925,
 -0.8830777808552808,
 -9.134549030138684,
 -6.321725870215625,
 -4.709459929176728,
 -3.071355831727611,
 -13.030365503344088]

In [7]:
joint_log_lender_defaulting = testing_data.dot(np.log(prob_word_lender_defaulting) - np.log(prob_word)) + np.log(prob_class[Lender_defaulting_classification_id])
joint_log_lender_defaulting_list =[]
for item in joint_log_lender_defaulting:
    joint_log_lender_defaulting_list.append(item)
joint_log_lender_defaulting_list

[-10.09694588629352,
 -12.862245464293633,
 -5.736675025390013,
 -9.036785922790337,
 10.582295393398686,
 -7.766904945465448,
 -5.708748318855492,
 -2.388498501928927,
 -7.871113667726584,
 -4.034815488696304]

In [8]:
joint_log_governing_law = testing_data.dot(np.log(prob_word_governing_law) - np.log(prob_word)) + np.log(prob_class[Governing_law_classification_id])
joint_log_governing_law_list =[]
for item in joint_log_governing_law:
    joint_log_governing_law_list.append(item)
joint_log_governing_law_list

[-25.300105996575486,
 7.222208336977501,
 1.4022850664279771,
 -9.636105029447492,
 -9.19650691496987,
 -0.3399573798826119,
 -10.706639403525722,
 -11.65333565016799,
 -8.341236514540453,
 -11.043596150555022]

In [9]:
joint_log_indemnification = testing_data.dot(np.log(prob_word_indemnification) - np.log(prob_word)) + np.log(prob_class[Indemnification_classification_id])
joint_log_indemnification_list =[]
for item in joint_log_indemnification:
    joint_log_indemnification_list.append(item)
joint_log_indemnification_list

[-13.58147792430564,
 -15.204477022454405,
 -5.682551845482808,
 3.92318778941495,
 2.1279500558105706,
 -9.141331219529885,
 -2.8412144221882487,
 -7.871076337628969,
 -11.121809596722578,
 -13.508316283318669]

In [10]:
joint_log_other = testing_data.dot(np.log(prob_word_other) - np.log(prob_word)) + np.log(prob_class[Other_classification_id])
joint_log_other_list =[]
for item in joint_log_other:
    joint_log_other_list.append(item)
joint_log_other_list

[-26.644808323083428,
 -12.606520045157708,
 -6.276567127015984,
 -10.378230647526987,
 -7.886403554894414,
 -7.271237265716858,
 -8.563648033474065,
 -10.076553344373904,
 -14.891732430791517,
 -17.172878601729785]

In [11]:
join_prob = np.array([joint_log_preamble_list,joint_log_lender_defaulting_list,joint_log_governing_law_list,joint_log_indemnification_list,joint_log_other_list],dtype=object)
join_prob=join_prob.transpose()
join_prob

array([[-5.18962375956347, -10.09694588629352, -25.300105996575486,
        -13.58147792430564, -26.644808323083428],
       [-14.081503263715542, -12.862245464293633, 7.222208336977501,
        -15.204477022454405, -12.606520045157708],
       [-7.905845974665435, -5.736675025390013, 1.4022850664279771,
        -5.682551845482808, -6.276567127015984],
       [-10.358854616335925, -9.036785922790337, -9.636105029447492,
        3.92318778941495, -10.378230647526987],
       [-0.8830777808552808, 10.582295393398686, -9.19650691496987,
        2.1279500558105706, -7.886403554894414],
       [-9.134549030138684, -7.766904945465448, -0.3399573798826119,
        -9.141331219529885, -7.271237265716858],
       [-6.321725870215625, -5.708748318855492, -10.706639403525722,
        -2.8412144221882487, -8.563648033474065],
       [-4.709459929176728, -2.388498501928927, -11.65333565016799,
        -7.871076337628969, -10.076553344373904],
       [-3.071355831727611, -7.871113667726584, -8.34123

## Making Predictions for Validation Data

In [18]:
prediction = join_prob.argmax(1)
prediction=prediction.transpose()
prediction_label_list=[]
for item in prediction:
    prediction_label_list.append(LABEL_NAMES[item])
prediction_label_list


['Preamble',
 'Governing Law',
 'Governing Law',
 'Indemnification',
 'Lender Defaulting',
 'Governing Law',
 'Indemnification',
 'Lender Defaulting',
 'Preamble',
 'Lender Defaulting']

## Storing the Predicted Data in the form of a DataFrame

In [22]:
testing_statement = pd.read_csv(TESTING_DATA_PATH)
testing_statement['PREDICTED CLASS'] = prediction_label_list
testing_statement

Unnamed: 0,STATEMENT,PREDICTED CLASS
0,"Amendment No. 1 dated as of February 4, 2016 (...",Preamble
1,"Subject to the Legal Reservations, (a) the cho...",Governing Law
2,"This Agreement, and any non-contractual obliga...",Governing Law
3,Each Lender agrees to indemnify the Administra...,Indemnification
4,Any Lender that (a) has failed to (i) fund all...,Lender Defaulting
5,This Assignment and Assumption shall be govern...,Governing Law
6,As soon as practicable after any payment of In...,Indemnification
7,"Non-Defaulting Lender means, at any time, each...",Lender Defaulting
8,Fundco Facility Agreement means the facility a...,Preamble
9,Notwithstanding any provision of this Agreemen...,Lender Defaulting


## Saving the Predictions in CSV and JSON Files

In [25]:
testing_statement.to_csv(TESTING_PREDICTION_PATH_CSV)
testing_statement.to_json(TESTING_PREDICTION_PATH_JSON)