# Classify Selected ClaimReview Claims

### Imports

In [1]:
import os
import datetime
import h5py
import logging
import argparse
import pickle
import pandas as pd
import keras
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Input
from tensorflow.keras.layers import BatchNormalization, LayerNormalization
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn import metrics
from matplotlib import pyplot as plt
import seaborn as sns
from joblib import dump, load
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.multiclass import OneVsRestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
import config
from utils.file import directory_check

### Functions

In [2]:
# Prediction int to string
def prediction_value(pred):
    value_dict = {'0': 'dob', '1': 'education', '2': 'institution', '3':'pob', '4':'pod'}
    index = None
    num = max(max(pred))
    for i,item in enumerate(pred[0]):
        if item == num:
            index = i
    return value_dict[str(index)]

# Adds prediction to dataframe
def predict_df(X):
    vec = X["Short_Path"]
    shape = vec.shape[0]
    vec = np.reshape(vec, (shape,1)).T
    prediction = prediction_value(MODEL.predict(vec))
    
    return prediction

# Adds prediction evaluation to dataframe
def predict_success(X):
    prediction = X['Prediction']
    actual = X['Relation']
    
    if prediction == actual:
        return "Yes"
    else:
        return "No"

### Load trained model and dataframe of shortest paths

In [3]:
model_f = '/sp_model-dnn_wide-dnn_wide-cr-d256-wl50-nw200-win15-p2-q3-201119.h5'
MODEL = tf.keras.models.load_model(config.MODEL_DIR + model_f) 

sp_file = '/sp_df-cr-d256-wl50-nw200-win15-p2-q3-201118.pkl'

### Modify Dataframe for Prediction task

In [4]:
df = pd.read_pickle(config.SP_DIR + sp_file)

df = df[df['Relation']=='cr'] # Remove all non-ClaimReview Claims

# Sort entries by UID
df['ID'] = df['UID'].apply(lambda x: x.split('_')[-1].split('r')[-1]).astype(int)
df = df.set_index('ID')
df = df.sort_index()
df['Relation']=df['UID'].apply(lambda x: x.split('_')[0])

# Add Two new Columns for Prediction analysis
df['Prediction'] = 'None'
df['Successful?'] = 'No'
df.head()

Unnamed: 0_level_0,UID,Subject,Object,Relation,Maj_Vote,Short_Path,Prediction,Successful?
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,pob_cr1,Penny Wong,Malaysia,pob,yes,"[-0.12060831, -0.004389023, 0.016019436, 0.095...",,No
2,pob_cr2,Barack Obama,Kenya,pob,yes,"[-0.11935643, 0.0055734552, 0.032190174, -0.01...",,No
3,pob_cr3,Barack Obama,United States,pob,yes,"[-0.022086723, 0.010196335, 0.033510454, 0.000...",,No
4,pob_cr4,Fred Trump,Germany,pob,yes,"[-0.08399602, -0.037151054, 0.017798765, 0.057...",,No
5,dob_cr5,Barron Trump,2006,dob,yes,"[-0.14999631, -0.032707285, -0.07424754, 0.104...",,No


### Read in encoder and prep data

In [5]:
encoder_file = config.SP_SPLITS_DIR + '/class_encoder.npy'
encoder = LabelEncoder()
encoder.classes_ = np.load(encoder_file, allow_pickle=True)

In [6]:
X = df["Short_Path"].apply(pd.Series)

y = df[['Relation']]
y = encoder.transform(y)
y = to_categorical(y)

  return f(**kwargs)


### Make Predictions

In [7]:
df["Prediction"] = df.apply(predict_df, axis=1)
df["Successful?"] = df.apply(predict_success, axis=1)
predictions = MODEL.predict(X)
print(f"AUC: {round(metrics.roc_auc_score(y, predictions), 3)}")

acc = len(df[df['Successful?']=='Yes'])/len(df) 
print(f"Accuracy: {round(acc*100,2)}%")
df

AUC: 0.958
Accuracy: 71.43%


Unnamed: 0_level_0,UID,Subject,Object,Relation,Maj_Vote,Short_Path,Prediction,Successful?
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,pob_cr1,Penny Wong,Malaysia,pob,yes,"[-0.12060831, -0.004389023, 0.016019436, 0.095...",dob,No
2,pob_cr2,Barack Obama,Kenya,pob,yes,"[-0.11935643, 0.0055734552, 0.032190174, -0.01...",institution,No
3,pob_cr3,Barack Obama,United States,pob,yes,"[-0.022086723, 0.010196335, 0.033510454, 0.000...",pob,Yes
4,pob_cr4,Fred Trump,Germany,pob,yes,"[-0.08399602, -0.037151054, 0.017798765, 0.057...",pob,Yes
5,dob_cr5,Barron Trump,2006,dob,yes,"[-0.14999631, -0.032707285, -0.07424754, 0.104...",pob,No
6,dob_cr6,Isabelle Duterte,"January 26, 2002",dob,yes,"[-0.04258087, -0.14339234, 0.0050823386, 0.008...",dob,Yes
7,education_cr7,Tej Pratap Yadav,Doctorate Degree,education,yes,"[-0.09566146, -0.04633151, -0.0026847243, 0.02...",education,Yes
8,education_cr8,Smriti Irani,MA Degree,education,yes,"[-0.073904075, -0.0076472485, 0.050191842, 0.0...",institution,No
9,education_cr9,Melania Trump,Bachelor's Degree,education,yes,"[-0.10019517, -0.05538463, -0.023950523, 0.043...",institution,No
10,education_cr10,Michelle Obama,Juris Doctor,education,yes,"[-0.07963335, 0.0015545988, 0.04472027, 0.0461...",education,Yes


### Save Predictions

In [9]:
directory_check(config.CLAIM_DIR)
df.to_pickle(config.CLAIM_DF)