In [2]:
import tensorflow as tf
from tensorflow import feature_column
from tensorflow.keras import layers
#tf.enable_eager_execution()
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import numpy as np
import pickle
import numpy as np

In [3]:
with open('encodedStates.pickle', 'rb') as handle:
    data = pickle.load(handle)

In [4]:
def df_to_np(dataframe,shuffle=True,batch_size=32,target="HHVEHCNT"):
    dataframe = dataframe.copy()
    labels = dataframe.pop(target)
    return dataframe.values, labels.values

In [10]:
class TransferLearn:
    
    def __init__(self, fromState, toState, layers_to_transfer, verbose):
        self.data = data
        self.fromState = fromState
        self.toState = toState
        self.layers_to_transfer = layers_to_transfer
        self.verbose = verbose
        
        
        self.train_X_from, self.train_y_from = None, None
        self.train_X_to, self.train_y_to = None, None
        self.fromModelWeights = None
    
    def initFromStateData(self, sample_size = None):
        if sample_size:
            self.train_X_from, self.train_y_from = df_to_np(data[self.fromState].sample(sample_size))
            return self.train_X_from, self.train_y_from
        else:
            self.train_X_from, self.train_y_from = df_to_np(data[self.fromState])
            return self.train_X_from, self.train_y_from
    
    def initToStateData(self, sample_size = None):
        if sample_size:
            self.train_X_to, self.train_y_to = df_to_np(data[self.toState].sample(sample_size))
            return self.train_X_to, self.train_y_to
        else:
            self.train_X_to, self.train_y_to = df_to_np(data[self.toState])
            return self.train_X_to, self.train_y_to
    
    #Get the weights from model built on toStateData
    def getFromModelWeights(self, batch_size=100, validation_split=0.2, epochs=50):
        train_X_from = self.train_X_from.copy()
        train_y_from = self.train_y_from.copy()
        model = tf.keras.Sequential([
          tf.keras.layers.Dense(128,activation="relu"),
          tf.keras.layers.Dense(128),
          tf.keras.layers.Dense(128),
          tf.keras.layers.Dense(128),
          tf.keras.layers.Dense(64,activation="relu"),
          tf.keras.layers.Dense(4,activation="softmax")
        ])
        
        model.compile(optimizer='adam',
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])

        model.fit(x=self.train_X_from, y = self.train_y_from,epochs=epochs, batch_size = batch_size, validation_split=validation_split, shuffle=True, verbose=self.verbose)
        
        weights = []
        for i in range(0,self.layers_to_transfer):
            weights.append(model.layers[i].get_weights())
        self.fromModelWeights = weights
        return self.fromModelWeights
    
    def transfer(self, trainable=True, batch_size=100, validation_split=0.2, epochs=50):
        train_X_to = self.train_X_to.copy()
        train_y_to = self.train_y_to.copy()
        weights = self.fromModelWeights if self.fromModelWeights else self.getFromModelWeights()
        print("*****weights obtained from fromStateData*****")
        
        print("Transferring")
        model = tf.keras.Sequential()
        model.add(tf.keras.layers.Dense(128,activation="relu", weights=weights[0], trainable=trainable))
        for i in range(1,self.layers_to_transfer):
            model.add(tf.keras.layers.Dense(128, weights=weights[i], trainable=trainable))
        for i in range(4-self.layers_to_transfer,4):
            model.add(tf.keras.layers.Dense(128))
        model.add(tf.keras.layers.Dense(64,activation="relu"))
        model.add(tf.keras.layers.Dense(4,activation="softmax"))

        
        model.compile(optimizer='adam',
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])
        history = model.fit(x=self.train_X_to, y = self.train_y_to, epochs=epochs, batch_size = batch_size, validation_split=validation_split, shuffle=True,verbose=self.verbose)
        val_acc = history.history["val_accuracy"][-1]
        print("*****Transferring done*****", "Trainable:", trainable)
        return model, val_acc
    
    
    def benchmark(self, batch_size=100, validation_split=0.2, epochs=50):
        train_X_to = self.train_X_to.copy()
        train_y_to = self.train_y_to.copy()
        print("*****Training Benchmark Model*****")
        model = tf.keras.Sequential([
          tf.keras.layers.Dense(128,activation="relu"),
          tf.keras.layers.Dense(128),
          tf.keras.layers.Dense(128),
          tf.keras.layers.Dense(128),
          tf.keras.layers.Dense(64,activation="relu"),
          tf.keras.layers.Dense(4,activation="softmax")
        ])
        
        model.compile(optimizer='adam',
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])

        history = model.fit(x=self.train_X_to, y = self.train_y_to,epochs=epochs, batch_size = batch_size, validation_split=validation_split, shuffle=True,verbose=self.verbose)
        val_acc = history.history["val_accuracy"][-1]
        print("*****Training Benchmark Model Done*****")
        return model, val_acc
    
    def compare(self):
        benchmark_model, benchmark_acc = self.benchmark()
        frozenTransfer_model, frozenTransfer_acc = self.transfer(trainable=False)
        unfrozenTransfer_model, unfrozenTransfer_acc = self.transfer(trainable=True)
        
        return (benchmark_acc-frozenTransfer_acc, benchmark_acc-unfrozenTransfer_acc)
        
    

In [15]:
MA_to_NY = TransferLearn(fromState = "MA", toState = "NY", layers_to_transfer=2, verbose = 1)
MA_to_NY.initFromStateData()
MA_to_NY.initToStateData(500)
compareMAandNY = MA_to_NY.compare()

*****Training Benchmark Model*****
Train on 400 samples, validate on 100 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
*****Training Benchmark Model Done*****
Train on 2568 samples, validate on 643 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50


Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
*****weights obtained from fromStateData*****
Transferring
Train on 400 samples, validate on 100 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50


Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
*****Transferring done***** Trainable: False
*****weights obtained from fromStateData*****
Transferring
Train on 400 samples, validate on 100 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50


Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
*****Transferring done***** Trainable: True


In [16]:
compareMAandNY

(-0.00999999, -0.00999999)

In [17]:
print(data.keys())

dict_keys(['NC', 'WI', 'NY', 'MD', 'PA', 'CA', 'TX', 'AZ', 'WA', 'IL', 'KY', 'MT', 'IA', 'GA', 'ME', 'VA', 'SC', 'WV', 'FL', 'NH', 'MN', 'NE', 'AR', 'NJ', 'SD', 'NM', 'OK', 'MI', 'VT', 'ID', 'DE', 'MA', 'WY', 'CO', 'IN', 'AL', 'TN', 'HI', 'AK', 'OH', 'RI', 'LA', 'OR', 'KS', 'UT', 'MO', 'DC', 'NV', 'ND', 'MS', 'CT'])


In [18]:
states = ["MA", "NY", "AK", "CO"]
I,J = len(states), len(states)
similarity_list = []

In [19]:
# layers_to_transfer = 4
# similarity_list.append(np.zeros((2,I,J)))
# #We can only transfer at most 4 layers for now
# for n in range(1,layers_to_transfer + 1):
#     #n of IxJ matrices
#     similarity = np.zeros((2,I,J))
#     for r in range(I):
#         fromState = states[r]
#         for c in range(J):
#             toState = states[c]
#             print("Processing", fromState, toState, "layers transferred:", n)
#             if fromState != toState:
#                 transferLearner = TransferLearn(fromStateData = data[fromState], toStateData = data[toState], layers_to_transfer=n,verbose=0)
#                 frozen_diff, unfrozen_diff = transferLearner.compare()
#             else:
#                 frozen_diff, unfrozen_diff = 0, 0
#             similarity[0][r][c] = frozen_diff
#             similarity[1][r][c] = unfrozen_diff
#     print(similarity)
#     similarity_list.append(similarity)
    

In [20]:
# for i in range(len(similarity_list)):
#     print("Transferred",i,"layers:")
#     print(similarity_list[i])

In [21]:
layers_to_transfer = 3
similarity_list.append(np.zeros((2,I,J)))
#We can only transfer at most 4 layers for now
for n in range(1,layers_to_transfer + 1):
    #n of IxJ matrices
    similarity = np.zeros((2,I,J))
    for r in range(I):
        fromState = states[r]
        transferLearner = TransferLearn(fromState = fromState, toState = None, layers_to_transfer=n,verbose=0)
        transferLearner.initFromStateData()
        transferLearner.getFromModelWeights()
        for c in range(J):
            toState = states[c]
            print("Processing", fromState, toState, "layers transferred:", n)
            if fromState != toState:
                transferLearner.toState = toState
                transferLearner.initToStateData(sample_size=500)
                frozen_diff, unfrozen_diff = transferLearner.compare()
            else:
                frozen_diff, unfrozen_diff = 0, 0
            similarity[0][r][c] = frozen_diff
            similarity[1][r][c] = unfrozen_diff
    print(similarity)
    similarity_list.append(similarity)

Processing MA MA layers transferred: 1
Processing MA NY layers transferred: 1
*****Training Benchmark Model*****
*****Training Benchmark Model Done*****
*****weights obtained from fromStateData*****
Transferring
*****Transferring done***** Trainable: False
*****weights obtained from fromStateData*****
Transferring
*****Transferring done***** Trainable: True
Processing MA AK layers transferred: 1
*****Training Benchmark Model*****
*****Training Benchmark Model Done*****
*****weights obtained from fromStateData*****
Transferring
*****Transferring done***** Trainable: False
*****weights obtained from fromStateData*****
Transferring
*****Transferring done***** Trainable: True
Processing MA CO layers transferred: 1
*****Training Benchmark Model*****
*****Training Benchmark Model Done*****
*****weights obtained from fromStateData*****
Transferring
*****Transferring done***** Trainable: False
*****weights obtained from fromStateData*****
Transferring
*****Transferring done***** Trainable: Tru

*****Transferring done***** Trainable: False
*****weights obtained from fromStateData*****
Transferring
*****Transferring done***** Trainable: True
Processing CO CO layers transferred: 2
[[[ 0.          0.04000002  0.02000004 -0.00999999]
  [ 0.03999996  0.          0.00999999  0.01000005]
  [ 0.04000002  0.02999997  0.         -0.00999999]
  [-0.01999998  0.          0.04000002  0.        ]]

 [[ 0.          0.09000003  0.         -0.05000001]
  [ 0.02999997  0.          0.05000001  0.04000002]
  [ 0.04000002  0.06999996  0.         -0.01999998]
  [-0.02999997  0.00999999  0.          0.        ]]]
Processing MA MA layers transferred: 3
Processing MA NY layers transferred: 3
*****Training Benchmark Model*****
*****Training Benchmark Model Done*****
*****weights obtained from fromStateData*****
Transferring
*****Transferring done***** Trainable: False
*****weights obtained from fromStateData*****
Transferring
*****Transferring done***** Trainable: True
Processing MA AK layers transferr

In [26]:
#run 1
for i in range(len(similarity_list)):
    print("Transferred",i,"layers:")
    print(similarity_list[i])

Transferred 0 layers:
[[[0. 0. 0. 0.]
  [0. 0. 0. 0.]
  [0. 0. 0. 0.]
  [0. 0. 0. 0.]]

 [[0. 0. 0. 0.]
  [0. 0. 0. 0.]
  [0. 0. 0. 0.]
  [0. 0. 0. 0.]]]
Transferred 1 layers:
[[[ 0.          0.02000004  0.00999999  0.        ]
  [-0.01999998  0.          0.         -0.00999999]
  [-0.01000005  0.          0.          0.00999999]
  [-0.03000003 -0.01999998  0.05000001  0.        ]]

 [[ 0.          0.01000005 -0.03000003 -0.01000005]
  [-0.00999999  0.          0.00999999 -0.00999999]
  [-0.02000004  0.01999998  0.         -0.00999999]
  [-0.04000002 -0.01999998  0.05000001  0.        ]]]
Transferred 2 layers:
[[[ 0.          0.04000002  0.02000004 -0.00999999]
  [ 0.03999996  0.          0.00999999  0.01000005]
  [ 0.04000002  0.02999997  0.         -0.00999999]
  [-0.01999998  0.          0.04000002  0.        ]]

 [[ 0.          0.09000003  0.         -0.05000001]
  [ 0.02999997  0.          0.05000001  0.04000002]
  [ 0.04000002  0.06999996  0.         -0.01999998]
  [-0.02999997  

In [24]:
frozen_similarity_list = [m[0] for m in similarity_list]
unfrozen_similarity_list = [m[1] for m in similarity_list]

In [25]:
frozen_similarity_list

[array([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]]),
 array([[ 0.        ,  0.02000004,  0.00999999,  0.        ],
        [-0.01999998,  0.        ,  0.        , -0.00999999],
        [-0.01000005,  0.        ,  0.        ,  0.00999999],
        [-0.03000003, -0.01999998,  0.05000001,  0.        ]]),
 array([[ 0.        ,  0.04000002,  0.02000004, -0.00999999],
        [ 0.03999996,  0.        ,  0.00999999,  0.01000005],
        [ 0.04000002,  0.02999997,  0.        , -0.00999999],
        [-0.01999998,  0.        ,  0.04000002,  0.        ]]),
 array([[ 0.        , -0.00999999, -0.01999998, -0.01999998],
        [-0.01999998,  0.        ,  0.        , -0.06      ],
        [-0.01000005, -0.01999998,  0.        , -0.06      ],
        [ 0.01000005, -0.00999999, -0.06999999,  0.        ]])]

In [27]:
unfrozen_similarity_list

[array([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]]),
 array([[ 0.        ,  0.01000005, -0.03000003, -0.01000005],
        [-0.00999999,  0.        ,  0.00999999, -0.00999999],
        [-0.02000004,  0.01999998,  0.        , -0.00999999],
        [-0.04000002, -0.01999998,  0.05000001,  0.        ]]),
 array([[ 0.        ,  0.09000003,  0.        , -0.05000001],
        [ 0.02999997,  0.        ,  0.05000001,  0.04000002],
        [ 0.04000002,  0.06999996,  0.        , -0.01999998],
        [-0.02999997,  0.00999999,  0.        ,  0.        ]]),
 array([[ 0.        ,  0.07000002,  0.00999999,  0.03000003],
        [-0.01999998,  0.        , -0.03000003, -0.04000002],
        [-0.05000001,  0.05000001,  0.        , -0.03999996],
        [-0.03999996,  0.00999999, -0.06999999,  0.        ]])]

In [30]:
frozen_max_similarity_matrix = np.maximum.reduce(frozen_similarity_list)

In [31]:
frozen_max_similarity_matrix

array([[0.        , 0.04000002, 0.02000004, 0.        ],
       [0.03999996, 0.        , 0.00999999, 0.01000005],
       [0.04000002, 0.02999997, 0.        , 0.00999999],
       [0.01000005, 0.        , 0.05000001, 0.        ]])

In [32]:
unfrozen_max_similarity_matrix = np.maximum.reduce(unfrozen_similarity_list)

In [33]:
unfrozen_max_similarity_matrix

array([[0.        , 0.09000003, 0.00999999, 0.03000003],
       [0.02999997, 0.        , 0.05000001, 0.04000002],
       [0.04000002, 0.06999996, 0.        , 0.        ],
       [0.        , 0.00999999, 0.05000001, 0.        ]])