# FMTC2019 - Neural Networks - Variable Annuities

#### paper : https://arxiv.org/pdf/1606.07831

### Importing Packages 

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import keras 
import tensorflow
import theano
import sys

from sklearn.model_selection import train_test_split,cross_val_score,GridSearchCV
from sklearn.preprocessing import StandardScaler,LabelEncoder,OneHotEncoder
from sklearn.metrics import confusion_matrix
from keras.models import Sequential
from keras.layers import Dense,Flatten
from keras.wrappers.scikit_learn import KerasRegressor
from keras.layers import Dropout
import itertools

Using TensorFlow backend.


### Importing Data

In [2]:
# We will use two databases selected by K-means.
# Dimension of sample1.csv = 340 observations (rep_contracts).
# Dimension of sample2.csv = 680 observations (train_contracts).
# sample2 will be our data that will be splitted up into train, test and validation data.
# sample1 will be our representative contracts.

# Importing:
rep_contracts = pd.read_csv("sample1.csv")
train_contracts = pd.read_csv("sample2.csv")

# Cleaning
rep_contracts = rep_contracts.iloc[:,2:]
train_contracts = train_contracts.iloc[:,2:]

### Data 

In [3]:
# Function that cleans Data:

def cleaningData(x):
    
    # Building Account Value (AV) variable 
    x['AV'] = x[['FundValue' + str(i) for i in np.arange(1, 11, 1)]].sum(axis=1)

    
    # Filtering only importants variables
    x = x[['gender','productType','ttm','age', 'AV','gbAmt','withdrawal','wbWithdrawalRate','fmv']]
    
    # Selecting only two categories

    # TRASH:
    x.loc[:, 'productType'] = ["trash" if value in ['ABRP','ABRU','ABSU','IBRP','IBRU','IBSU','MBRP','MBRU','MBSU','DBAB','DBIB','DBMB','DBWB'] else value for value in list(x.productType)]

    # GMDB:
    x.loc[:, 'productType'] = ["GMDB" if value in ['DBRP','DBRU','DBSU'] else value for value in list(x.productType)]

    # GMWB:
    x.loc[:, 'productType'] = ["GMWB" if value in ['WBRP','WBRU','WBSU'] else value for value in list(x.productType)]
    
    # Building variables:
    x.withdrawal = x.withdrawal/x.AV
    x.gbAmt = x.gbAmt/x.AV
    
    # Categories into numbers:
    auxiliar_data = pd.DataFrame(x[::])
    auxiliar_data['male'] = [1 if value == 'M' else 0 for value in auxiliar_data.gender]
    auxiliar_data['GMDB'] = [1 if value == 'GMDB' else 0 for value in auxiliar_data.productType]
    auxiliar_data['GMWB'] = [1 if value == 'GMWB' else 0 for value in auxiliar_data.productType]

    # Dropping old category variables:
    auxiliar_data = auxiliar_data.drop(['gender', 'productType'], axis=1)

    # Dropping Acount Value == 0:
    auxiliar_data = auxiliar_data[auxiliar_data.AV != 0]
    
    y = pd.DataFrame(auxiliar_data['fmv'])
    x = auxiliar_data.drop(['fmv'], axis = 1)
    
    return [x,y]

In [4]:
# Cleaning Data:
rep = cleaningData(rep_contracts)
train = cleaningData(train_contracts)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[name] = value


In [5]:
# Building continuous transformation funtion:
# def contTransform(representative, trainning):
#     Rt = np.array([max(max(rep[0].loc[:, value]),max(train[0].loc[:, value])) for value in rep[0].columns[0:6]])
    
#     fmenos = trainning - representative
#     fmenos = np.array([max(value, 0) for value in fmenos])
#     fmenos = fmenos/Rt

#     fmais = representative - trainning
#     fmais = np.array([max(value, 0) for value in fmais])
#     fmais = fmais/Rt

#     return list(fmenos) + list(fmais)

In [6]:
# Building function in page 8.
# There are two types of vaiables: Categorical and Continuous.
# This function will be the inputs of our Neural Networks.

def distance(t, r):
    
    # Building continuous transformation funtion:
    def contTransform(representative, trainning):
        Rt = np.array([max(max(rep[0].loc[:, value]),max(train[0].loc[:, value])) for value in rep[0].columns[0:6]])

        fmenos = trainning - representative
        fmenos = np.array([max(value, 0) for value in fmenos])
        fmenos = fmenos/Rt

        fmais = representative - trainning
        fmais = np.array([max(value, 0) for value in fmais])
        fmais = fmais/Rt

        return list(fmenos) + list(fmais)
    
    # Continuous Variables Transformation
    F_MenosMais = contTransform(representative = r[['ttm','age', 'AV', "gbAmt", 'withdrawal', 'wbWithdrawalRate']],
                 trainning = t[['ttm','age', 'AV', "gbAmt", 'withdrawal', 'wbWithdrawalRate']])
    
    # Categorical Variables Transformation
    Fc = t[['male', 'GMDB', 'GMWB']] == r[['male', 'GMDB', 'GMWB']]
    Fc = list(Fc.apply(lambda x:1 if x==False else 0))
    
    # Appending Results
    f = Fc + F_MenosMais
    return(f)
    

### Splitting Data

In [7]:
x_train, x_test, y_train, y_test = train_test_split(train[0],train[1],test_size=0.20,random_state=0)

In [8]:
# Building inputs
measures = [[]]*(x_train.shape[0]*rep[0].shape[0])
k = 0
for i in range(x_train.shape[0]):
    for j in range(rep[0].shape[0]):
        measures[k] = distance(t = x_train.iloc[i, 0:], r = rep[0].iloc[j, 0:])
        k = k+1

KeyboardInterrupt: 

In [126]:
merged = list(itertools.chain(*measures))
base = np.array([[merged[i] for i in range(15*338)+j] for j in np.arange(0, len(merged), 15*338)])

### Evaluating the ANN

In [147]:
def build_regressor():
    regressor = Sequential()
    regressor.add(Dense(output_dim=338,init='normal',activation ='linear',input_dim=15*338))
    regressor.add(Dense(output_dim=338,init='normal',activation ='exponential'))
    regressor.add(Dense(output_dim=15*338,init='normal',activation ='softmax'))
#     regressor.dot(inputs = rep[1], axes, normalize=False)
    regressor.compile(optimizer = 'adam',loss='mean_squared_error', metrics=['mse'])
    return regressor

In [148]:
regressor = KerasRegressor(build_fn = build_regressor,batch_size=1000,epochs=2)
accuracies = cross_val_score(estimator = regressor, X = base, y = y_train,cv=10)

  This is separate from the ipykernel package so we can avoid doing imports until
  after removing the cwd from sys.path.
  """


Epoch 1/2


ResourceExhaustedError: OOM when allocating tensor with shape[5070,5070] and type float on /job:localhost/replica:0/task:0/device:CPU:0 by allocator mklcpu
	 [[{{node training_10/Adam/mul_12}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.


In [491]:
mean = accuracies.mean()
variance = accuracies.std()
print("Mean : " + str("{0:.2f}".format(mean*100))+"%")
print("Variance : " + str("{0:.2f}".format(variance*100))+"%")

Mean : -0.40%
Variance : 0.72%
