In [1]:
import tensorflow as tf
from tensorflow import feature_column
from tensorflow.keras import layers
#tf.enable_eager_execution()
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import numpy as np
import pickle
import numpy as np

In [2]:
with open('encodedStates.pickle', 'rb') as handle:
    data = pickle.load(handle)

In [3]:
def df_to_np(dataframe,shuffle=True,batch_size=32,target="HHVEHCNT"):
    dataframe = dataframe.copy()
    labels = dataframe.pop(target)
    return dataframe.values, labels.values

In [4]:
class TransferLearn:
    
    def __init__(self, fromStateData, toStateData, layers_to_transfer, verbose):
        self.data = data
        self.fromState = fromStateData
        self.toState = toStateData
        self.layers_to_transfer = layers_to_transfer
        self.verbose = verbose
        
        self.fromModelWeights = None
        
        self.train_X_from, self.train_y_from = df_to_np(fromStateData)
        self.train_X_to, self.train_y_to = df_to_np(toStateData)
    
    #Get the weights from model built on toStateData
    def getFromModelWeights(self, batch_size=100, validation_split=0.2, epochs=5):
        train_X_from = self.train_X_from.copy()
        train_y_from = self.train_y_from.copy()
        model = tf.keras.Sequential([
          tf.keras.layers.Dense(128,activation="relu"),
          tf.keras.layers.Dense(128),
          tf.keras.layers.Dense(128),
          tf.keras.layers.Dense(128),
          tf.keras.layers.Dense(64,activation="relu"),
          tf.keras.layers.Dense(4,activation="softmax")
        ])
        
        model.compile(optimizer='adam',
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])

        model.fit(x=train_X_from, y = train_y_from,epochs=5, batch_size = batch_size, validation_split=validation_split, shuffle=True, verbose=self.verbose)
        
        weights = []
        for i in range(0,self.layers_to_transfer):
            weights.append(model.layers[i].get_weights())
        self.fromModelWeights = weights
        return self.fromModelWeights
    
    def transfer(self, trainable=True, batch_size=100, validation_split=0.2, epochs=5):
        train_X_to = self.train_X_to.copy()
        train_y_to = self.train_y_to.copy()
        weights = self.fromModelWeights if self.fromModelWeights else self.getFromModelWeights()
        print("*****weights obtained from fromStateData*****")
        
        print("Transferring")
        model = tf.keras.Sequential()
        model.add(tf.keras.layers.Dense(128,activation="relu", weights=weights[0], trainable=trainable))
        for i in range(1,self.layers_to_transfer):
            model.add(tf.keras.layers.Dense(128, weights=weights[i], trainable=trainable))
        for i in range(4-self.layers_to_transfer,4):
            model.add(tf.keras.layers.Dense(128))
        model.add(tf.keras.layers.Dense(64,activation="relu"))
        model.add(tf.keras.layers.Dense(4,activation="softmax"))

        
        model.compile(optimizer='adam',
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])
        history = model.fit(x=train_X_to, y = train_y_to, epochs=epochs, batch_size = batch_size, validation_split=validation_split, shuffle=True,verbose=self.verbose)
        val_acc = history.history["val_accuracy"][-1]
        print("*****Transferring done*****", "Trainable:", trainable)
        return model, val_acc
    
    
    def benchmark(self, batch_size=100, validation_split=0.2, epochs=5):
        train_X_to = self.train_X_to.copy()
        train_y_to = self.train_y_to.copy()
        print("*****Training Benchmark Model*****")
        model = tf.keras.Sequential([
          tf.keras.layers.Dense(128,activation="relu"),
          tf.keras.layers.Dense(128),
          tf.keras.layers.Dense(128),
          tf.keras.layers.Dense(128),
          tf.keras.layers.Dense(64,activation="relu"),
          tf.keras.layers.Dense(4,activation="softmax")
        ])
        
        model.compile(optimizer='adam',
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])

        history = model.fit(x=train_X_to, y = train_y_to,epochs=5, batch_size = batch_size, validation_split=validation_split, shuffle=True,verbose=self.verbose)
        val_acc = history.history["val_accuracy"][-1]
        print("*****Training Benchmark Model Done*****")
        return model, val_acc
    
    def compare(self):
        benchmark_model, benchmark_acc = self.benchmark()
        frozenTransfer_model, frozenTransfer_acc = self.transfer(trainable=False)
        unfrozenTransfer_model, unfrozenTransfer_acc = self.transfer(trainable=True)
        
        return (benchmark_acc-frozenTransfer_acc, benchmark_acc-unfrozenTransfer_acc)
        
    

In [5]:
MA_to_NY = TransferLearn(fromStateData = data["MA"], toStateData = data["NY"], layers_to_transfer=2, verbose = 1)
compareMAandNY = MA_to_NY.compare()

*****Training Benchmark Model*****
Train on 81690 samples, validate on 20423 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
*****Training Benchmark Model Done*****
Train on 2568 samples, validate on 643 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
*****weights obtained from fromStateData*****
Transferring
Train on 81690 samples, validate on 20423 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
*****Transferring done***** Trainable: False
*****weights obtained from fromStateData*****
Transferring
Train on 81690 samples, validate on 20423 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
*****Transferring done***** Trainable: True


In [6]:
compareMAandNY

(0.002644062, 0.0076384544)

In [7]:
print(data.keys())

dict_keys(['NC', 'WI', 'NY', 'MD', 'PA', 'CA', 'TX', 'AZ', 'WA', 'IL', 'KY', 'MT', 'IA', 'GA', 'ME', 'VA', 'SC', 'WV', 'FL', 'NH', 'MN', 'NE', 'AR', 'NJ', 'SD', 'NM', 'OK', 'MI', 'VT', 'ID', 'DE', 'MA', 'WY', 'CO', 'IN', 'AL', 'TN', 'HI', 'AK', 'OH', 'RI', 'LA', 'OR', 'KS', 'UT', 'MO', 'DC', 'NV', 'ND', 'MS', 'CT'])


In [8]:
states = ["MA", "CA", "NY"]
I,J = len(states), len(states)
similarity_list = []

In [9]:
layers_to_transfer = 4
similarity_list.append(np.zeros((2,I,J)))
#We can only transfer at most 4 layers for now
for n in range(1,layers_to_transfer + 1):
    #n of IxJ matrices
    similarity = np.zeros((2,I,J))
    for r in range(I):
        fromState = states[r]
        for c in range(J):
            toState = states[c]
            print("Processing", fromState, toState, "layers transferred:", n)
            if fromState != toState:
                transferLearner = TransferLearn(fromStateData = data[fromState], toStateData = data[toState], layers_to_transfer=n,verbose=0)
                frozen_diff, unfrozen_diff = transferLearner.compare()
            else:
                frozen_diff, unfrozen_diff = 0, 0
            similarity[0][r][c] = frozen_diff
            similarity[1][r][c] = unfrozen_diff
    print(similarity)
    similarity_list.append(similarity)
    

Processing MA MA layers transferred: 1
Processing MA CA layers transferred: 1
*****Training Benchmark Model*****
*****Training Benchmark Model Done*****
*****weights obtained from fromStateData*****
Transferring
*****Transferring done***** Trainable: False
*****weights obtained from fromStateData*****
Transferring
*****Transferring done***** Trainable: True
Processing MA NY layers transferred: 1
*****Training Benchmark Model*****
*****Training Benchmark Model Done*****
*****weights obtained from fromStateData*****
Transferring
*****Transferring done***** Trainable: False
*****weights obtained from fromStateData*****
Transferring
*****Transferring done***** Trainable: True
Processing CA MA layers transferred: 1
*****Training Benchmark Model*****
*****Training Benchmark Model Done*****
*****weights obtained from fromStateData*****
Transferring
*****Transferring done***** Trainable: False
*****weights obtained from fromStateData*****
Transferring
*****Transferring done***** Trainable: Tru

*****Training Benchmark Model Done*****
*****weights obtained from fromStateData*****
Transferring
*****Transferring done***** Trainable: False
*****weights obtained from fromStateData*****
Transferring
*****Transferring done***** Trainable: True
Processing NY CA layers transferred: 4
*****Training Benchmark Model*****
*****Training Benchmark Model Done*****
*****weights obtained from fromStateData*****
Transferring
*****Transferring done***** Trainable: False
*****weights obtained from fromStateData*****
Transferring
*****Transferring done***** Trainable: True
Processing NY NY layers transferred: 4
[[[ 0.00000000e+00 -8.01265240e-04  4.60267067e-03]
  [-6.53188229e-02  0.00000000e+00 -3.08471918e-03]
  [-1.86625123e-02  9.93609428e-04  0.00000000e+00]]

 [[ 0.00000000e+00  3.23730707e-03 -8.03011656e-03]
  [-2.48833895e-02  0.00000000e+00 -2.00748444e-03]
  [ 1.71073079e-02  9.61422920e-05  0.00000000e+00]]]


In [11]:
for i in range(len(similarity_list)):
    print("Transferred",i,"layers:")
    print(similarity_list[i])

Transferred 0 layers:
[[[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]]
Transferred 1 layers:
[[[ 0.          0.00743616 -0.00063652]
  [ 0.01244169  0.         -0.0251677 ]
  [-0.01088649  0.00227571  0.        ]]

 [[ 0.          0.00278854  0.00088137]
  [ 0.01244169  0.         -0.02536356]
  [-0.01088649 -0.00192314  0.        ]]]
Transferred 2 layers:
[[[ 0.          0.0048399   0.00665915]
  [-0.0279938   0.         -0.00225234]
  [ 0.02021772 -0.01631463  0.        ]]

 [[ 0.          0.00608993  0.00014687]
  [ 0.01866251  0.          0.00053859]
  [ 0.0155521  -0.00842977  0.        ]]]
Transferred 3 layers:
[[[ 0.         -0.00522453  0.0012241 ]
  [ 0.06220835  0.          0.00254619]
  [-0.00622082 -0.00480783  0.        ]]

 [[ 0.          0.00012821  0.00014687]
  [ 0.10575426  0.         -0.00509226]
  [-0.00933129 -0.01230806  0.        ]]]
Transferred 4 layers:
[[[ 0.00000000e+00 -8.01265240e-04  4.60267067e-03]
  [-6.53188229e-02  0.00