In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.python.framework import ops
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [2]:
# loading data
data = pd.read_csv(
    "/Users/yamadaikuya/Desktop/Research/Codes/Ethiopia/csv/interactions.csv",
    )

#data = pd.read_csv(
#    "/Users/yamadaikuya/Desktop/Research/Codes/Ethiopia/csv/pruned.csv",
#    )

In [3]:
data = data.dropna(subset=['Age', 'female', 'Years_of_Schooling', 'Math_Score',
       'parents_are_farmers', 'born_in_this_village', 'Risk_averse',
       'Competitive', 'Absolute_Overconfidence', 'Relative_Overconfidence', "Cut_Flower",
                           'ln_workers_income_cu', 'save_rate'])

In [4]:
data = data.drop(['type_', 'ln_workers_income_bf'], axis=1)

In [5]:
# dataframe -> numpy.array
Y = data.loc[:, ["Cut_Flower"]].values
X = data.drop(["Cut_Flower", 'ln_workers_income_cu', 'save_rate'], axis=1).values
print(X.shape, Y.shape)

(261, 55) (261, 1)


In [6]:
# normalization
def normalize(x):
    xmean = x.mean(axis=0, keepdims=True)
    xstd = np.std(x, axis=0, keepdims=True)
    zscore = (x - xmean) / xstd
    return zscore

In [7]:
X = normalize(X)

In [8]:
def sigmoid(z):
    x = tf.placeholder(tf.float32, name="x")
    sigmoid = tf.sigmoid(x)
    sess = tf.Session()
    result = sess.run(sigmoid, feed_dict={x: z})
    return result

In [9]:
# n_x, n_y = nbr of variables
def create_placeholders(n_x, n_y):
    X = tf.placeholder(dtype=tf.float32, shape=[n_x, None])
    Y = tf.placeholder(dtype=tf.float32, shape=[n_y, None])
    return X, Y

In [10]:
# Initializing the parameters 
# Determin the shape of hidden layers
def initialize_parameters():
    
    tf.set_random_seed(198)
    
    W1 = tf.get_variable("W1", [4, 55], initializer=tf.contrib.layers.xavier_initializer(seed=198))
    b1 = tf.get_variable("b1", [4, 1], initializer=tf.zeros_initializer())
    W2 = tf.get_variable("W2", [1, 4], initializer=tf.contrib.layers.xavier_initializer(seed=198))
    b2 = tf.get_variable("b2", [1, 1], initializer=tf.zeros_initializer())
    
    parameters={
        "W1": W1,
        "b1": b1,
        "W2": W2,
        "b2": b2
    }
    
    return parameters

In [11]:
def forward_propagation(X, parameters):
    # retrieve parameters
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    
    Z1 = tf.add(tf.matmul(W1, X), b1)
    A1 = tf.nn.relu(Z1)
    Z2 = tf.add(tf.matmul(W2, A1), b2)
    
    return Z2

In [12]:
def compute_cost(Z2, Y):
    logits = tf.transpose(Z2)
    labels = tf.transpose(Y)
    cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=labels))
    return cost

In [13]:
# check the behavior
tf.reset_default_graph()
sess = tf.Session()
a, b = create_placeholders(55, 1)
parameters = initialize_parameters()
Z3 = forward_propagation(a, parameters)
cost = compute_cost(Z3, b)
print("Z2 = {}".format(str(Z3)))
print("cost = {}".format(str(cost)))


For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
Colocations handled automatically by placer.
Z2 = Tensor("Add_1:0", shape=(1, ?), dtype=float32)
cost = Tensor("Mean:0", shape=(), dtype=float32)


In [14]:
# Divide dataset 
def dividing(X, Y, seed=198):
    X_train, X_test, Y_train, Y_test = train_test_split(
        X, Y, test_size = 0.2, random_state=seed
        )
    return X_train.T, Y_train.T, X_test.T, Y_test.T
X_train, Y_train, X_test, Y_test = dividing(X, Y)

In [15]:
# sampling
def sampling(X_train, Y_train):
    _ = len(X_train)
    a = np.random.choice(_, int(_*0.9), replace=False)
    return X_train[:, a], Y_train[:, a]

In [18]:
# model_normal
def model(X_train, Y_train, num_epochs, learning_rate = 0.001,
         minibatch_size = 32, print_cost = True):
    
    ops.reset_default_graph() # to be able to rerun the model without overwriting tf variables
    tf.set_random_seed(1)
    (n_x, m) = X.shape
    n_y = Y.shape[0]
    costs = []
    test_cost = []
    
    Xph, Yph = create_placeholders(n_x, n_y)
    
    parameters = initialize_parameters()
    
    Z3 = forward_propagation(Xph, parameters)
    
    cost = compute_cost(Z3, Yph)
    
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
    
    init = tf.global_variables_initializer()
    
    sess = tf.Session()
    sess.run(init)
    
    for epoch in range(num_epochs+1):
         
        _, c = sess.run([optimizer, cost], feed_dict={Xph: X, Yph: Y})
        
        epoch_cost = c
        
        if epoch % 5000 == 0 and print_cost==True:
            print("Cost after epoch %i: %f"%(epoch, epoch_cost))

        if epoch % 10 == 0:
            costs.append(epoch_cost)
    
    # plot the cost
    plt.plot(np.squeeze(costs))
    plt.ylabel('cost')
    plt.xlabel('iterations (per tens)')
    plt.title("Learning rate =" + str(learning_rate))
    plt.show()
    
    # lets save the parameters in a variable
    print(type(parameters))
    parameters = sess.run(parameters)
    print ("Parameters have been trained!")

    # Calculate the correct predictions
    correct_prediction = tf.equal(tf.round(tf.sigmoid(Z3)), Yph)

    # Calculate accuracy on the test set
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    
    # Get coefficients of the output layer
    output = tf.sigmoid(Z3)
    output = output.eval({Xph: X_train}, session=sess)
    
    print ("Train Accuracy:", accuracy.eval({Xph: X_train, Yph: Y_train}, session=sess))
    #print ("Test Accuracy:", accuracy.eval({X: X_test, Y: Y_test}, session=sess))

    return parameters, output

In [19]:
X = X.T
Y = Y.T
parameters, output = model(X_train=X, Y_train=Y, num_epochs=10000, learning_rate=0.001)

ValueError: Dimensions must be equal, but are 55 and 261 for 'MatMul' (op: 'MatMul') with input shapes: [4,55], [261,?].

In [119]:
tf.train.batch(X, 32)

TypeError: values must be a list.