# Texas- SmoothGrad

In [None]:
from keras.layers import Dense, Dropout, Input, Conv2D, MaxPooling2D, Flatten
from keras.models import Sequential, Model
import keras
import numpy as np
from keras import optimizers
from sklearn.model_selection import train_test_split
from keras.callbacks import CSVLogger
import tensorflow as tf
import innvestigate
import innvestigate.utils as iutils
import pandas as pd
import matplotlib.pyplot as plt
import os
import time

In [None]:
import setGPU

In [None]:
sess = tf.InteractiveSession()

In [None]:
random_state = 42

### 1 Loading texas

In [None]:
data = pd.read_csv(r'' + '../data' + '/texas/100/feats', header=None)
labels = pd.read_csv(r'' + '../data' + '/texas/100/labels', header=None)
y = np.asarray(labels) - 1
X = np.asarray(data)

In [None]:
np.random.seed(random_state)
main_indicies = np.random.choice(len(X),80000)

In [None]:
X_temp,y_temp = X[main_indicies], y[main_indicies]

In [None]:
y_temp =  keras.utils.to_categorical(y_temp, 100)

### Training the models

In [None]:
lr = 0.01
decay = 1e-7
epochs = 50
points_per_model = 10000
verbose = 0
experiment_name = "texas-smoothgrad"
path="ThresholdExperiments/{}/{}/".format(experiment_name,random_state)

In [None]:
def create_texas_base():
    # batch_size = 512
    # epochs = 50
    # learning_rate = 0.01
    # lr_decay = 1e-7
    model = Sequential()
    # model.add(Dense(20, activation='tanh',
    #                kernel_initializer=keras.initializers.RandomNormal(mean=0.0, stddev=0.01, seed=None),
    #                bias_initializer='zeros'))
    # model.add(Dropout(0, name='dropout_3'))
    # model.add(Dense(100, activation='softmax',
    #                kernel_initializer=keras.initializers.RandomNormal(mean=0.0, stddev=0.01, seed=None),
    #               bias_initializer='zeros'))
    model.add(Dense(2048, activation='tanh',
                    kernel_initializer=keras.initializers.RandomNormal(mean=0.0, stddev=0.01, seed=None),
                    bias_initializer='zeros'))
    model.add(Dense(1024, activation='tanh',
                    kernel_initializer=keras.initializers.RandomNormal(mean=0.0, stddev=0.01, seed=None),
                    bias_initializer='zeros'))
    model.add(Dense(512, activation='tanh',
                    kernel_initializer=keras.initializers.RandomNormal(mean=0.0, stddev=0.01, seed=None),
                    bias_initializer='zeros'))
    model.add(Dense(256, activation='tanh',
                    kernel_initializer=keras.initializers.RandomNormal(mean=0.0, stddev=0.01, seed=None),
                    bias_initializer='zeros'))
    model.add(Dense(100, activation='softmax',
                    kernel_initializer=keras.initializers.RandomNormal(mean=0.0, stddev=0.01, seed=None),
                    bias_initializer='zeros'))
    return model

In [None]:
def evaluate_model(model,X,y,explanation="gradient",batch_size=256):
    prediction = model.predict(X)
    loss = keras.losses.categorical_crossentropy(tf.convert_to_tensor(prediction),tf.convert_to_tensor(y))
    with sess.as_default():
        loss = loss.eval()
    try:
    # noinspection PyUnresolvedReferences
        model_wo_softmax = iutils.keras.graph.model_wo_softmax(model)
    except Exception:
        model_wo_softmax = model
    analyzer = innvestigate.create_analyzer(explanation, model_wo_softmax)
    analysis = np.zeros(X.shape)
    for batch in range(0,len(X),batch_size):
        analysis[batch:batch+batch_size] = analyzer.analyze(X[batch:batch+batch_size])
    prediction_var = np.var(prediction,axis=1)
    analysis_var = np.var(analysis,axis=1)
    analysis_1 = np.linalg.norm(analysis,axis=1,ord=1)
    analysis_2 = np.linalg.norm(analysis,axis=1,ord=2)
    return loss, prediction_var, analysis_var, analysis_1, analysis_2

In [None]:
lr = 0.01
decay = 1e-7
epochs = 50
points_per_model = 10000
verbose = 0
experiment_name = "texas-smoothgrad"
explanation = "smoothgrad"
path="ThresholdExperiments/{}/{}/".format(experiment_name,random_state)

In [None]:
for random_state in range(10):
    print(random_state)
    print("######")
    path="ThresholdExperiments/{}/{}/".format(experiment_name,random_state)
    os.makedirs(path)
    np.random.seed(random_state)
    main_indicies = np.random.choice(len(X_temp),len(X_temp),replace=False)
    X,y = X_temp[main_indicies], y_temp[main_indicies]
    start = time.time()
    for model_number in range(np.floor(len(X)/(2*points_per_model)).astype(int)):
        print(model_number, end=" ")
        start_train = 2*model_number*points_per_model
        start_test = start_train+points_per_model
        end_test = start_test+points_per_model
        x_train, x_test =  X[start_train:start_test], X[start_test:end_test]
        y_train, y_test =  y[start_train:start_test], y[start_test:end_test]
        optimizer = optimizers.Adagrad(lr=lr, decay=decay)
        model = create_texas_base()
        model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
        model.fit(x_train, y_train, epochs=epochs, 
              validation_data=(x_test, y_test), verbose=verbose)
        train_loss, train_prediction_var, train_analysis_var, train_analysis_1, train_analysis_2 = evaluate_model(model,x_train,y_train,explanation=explanation)
        df = pd.DataFrame()
        df['train_loss'] = train_loss
        df['train_prediction_var'] = train_prediction_var
        df['train_analysis_var'] = train_analysis_var
        df['train_analysis_1'] = train_analysis_1
        df['train_analysis_2'] = train_analysis_2
        test_loss, test_prediction_var, test_analysis_var, test_analysis_1, test_analysis_2 = evaluate_model(model,x_test,y_test,explanation=explanation)
        df['test_loss'] = test_loss
        df['test_prediction_var'] = test_prediction_var
        df['test_analysis_var'] = test_analysis_var
        df['test_analysis_1'] = test_analysis_1
        df['test_analysis_2'] = test_analysis_2
        df.to_csv("{}/{}.csv".format(path,model_number))
        print(time.time() - start)