In [None]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


In [None]:
import bz2
from collections import Counter
import re
import nltk
import numpy as np
nltk.download('punkt')
!pip install bcolz
import bcolz
import pickle
import torch
from torch import nn, optim
from torch.autograd import Variable
import pandas as pd
import matplotlib.pyplot as plt # we can also use seaborn for prettier graphs or bokeh for interactive graphs 
import statistics as stat


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
Collecting bcolz
[?25l  Downloading https://files.pythonhosted.org/packages/5c/4e/23942de9d5c0fb16f10335fa83e52b431bcb8c0d4a8419c9ac206268c279/bcolz-1.2.1.tar.gz (1.5MB)
[K     |████████████████████████████████| 1.5MB 17.8MB/s 
Building wheels for collected packages: bcolz
  Building wheel for bcolz (setup.py) ... [?25l[?25hdone
  Created wheel for bcolz: filename=bcolz-1.2.1-cp37-cp37m-linux_x86_64.whl size=2651466 sha256=df6b057ba54f5d4eb44027a20f6d0899c5f4ceecdcbb95acf86d988a49e251ec
  Stored in directory: /root/.cache/pip/wheels/9f/78/26/fb8c0acb91a100dc8914bf236c4eaa4b207cb876893c40b745
Successfully built bcolz
Installing collected packages: bcolz
Successfully installed bcolz-1.2.1


In [None]:
DATASET_PATH = '/content/gdrive/Shareddrives/CS506_Team/Data/Blake_RPD_Dataset-2.xlsx'
df = pd.read_excel(DATASET_PATH, "data") # data is the sheet name
tft = ['tft' + str(i) for i in range(1,11)]
tft_rt = ['tft_rt' + str(i) for i in range(1,11)]
coop = ['coop' + str(i) for i in range(1,11)]
coop_rt = ['coop_rt' + str(i) for i in range(1,11)]
defs = ['def' + str(i) for i in range(1,11)]
def_rt = ['def_rt' + str(i) for i in range(1,11)]
decisions = tft+coop+defs

features = [tft, tft_rt, coop, coop_rt, defs, def_rt,decisions]

In [None]:
import numpy as np

"""
Converts an array of aggression scores into an array of
0 and 1's. 0 meaning score below median, and 1 meaning score
above median.
We are converting the scores like this because some score groups
are so underpopulated such that they only have 1 member """
def convert_by_median(aggr):
  """
  :param aggr: an array of a particular aggression score
  """
  med = np.median(aggr)
  for i in range(aggr.size):
    if aggr[i]<med:
      aggr[i] = 0
    else:
      aggr[i] = 1
  return aggr

"""
A helper function for preparing an array of aggression scores
before training the Logictic Regression Model.
We convert the datafram column to numpy array, finds the nan
indices, drop the nan's, and convert the array into 0 and 1's
by its median.
Returns the processed array and nan indices. """
def get_aggr(aggr_label):
  aggr = df[aggr_label].to_numpy(dtype=int)
  aggr_na = np.argwhere(np.isnan(aggr))
  aggr = aggr[~np.isnan(aggr)]
  aggr = convert_by_median(np.nan_to_num(aggr))
  return aggr, aggr_na

"""
Returns a matrix 20 x 3number_of_kids, with the first 10 columns being the
partner's deicisions and the latter 10 being the kids' decisions."""
def prepare_data(test_percent,data):
  """
  :param test_percent: percentage of test set among all samples
  :param data: a 30 x n_kids numpy matrix of the tft, coop, and def decisions of
  the particular kids we are interested in.
  """
  tot_mat = np.empty(shape=[3*data.shape[0],20])
  tft_dec = data[:,0:10]
  coop_dec = data[:,10:20]
  def_dec = data[:,20:30]
  for i,tft in enumerate(tft_dec):
    partner = np.copy(tft)
    partner[1:] = partner[0:-1]
    partner[0] = 1
    tot_mat[i] = np.concatenate((partner,tft))
  coop_default = np.array([1,1,0,1,1,1,0,1,1,1])
  for i,coop in enumerate(coop_dec):
    tot_mat[data.shape[0]+i] = np.concatenate((coop_default,coop))
  def_default = np.array([0,0,1,0,0,0,1,0,0,0])
  for i,defs in enumerate(def_dec):
    tot_mat[2*data.shape[0]+i] = np.concatenate((def_default,defs))

  shuffindex = np.random.permutation(3*data.shape[0])
  tot_mat = tot_mat[shuffindex]
  print(tot_mat[:3])
  print(tot_mat.shape)
  #regressiondata = np.array(data[data['period']==10].iloc[:,3:51]) # (8258, 48)
  #regressiondata,trajs = regressiondata[shuffindex],trajs[shuffindex] 
  #train_set_rgx, train_set_rgy = ipd_regression_data(regressiondata[n:])
  #test_set_rgx, test_set_rgy = ipd_regression_data(regressiondata[:n])
  tot_mat = tot_mat.reshape((3*data.shape[0],2,10)) # (df.shape[0], 2, 10)
  print(tot_mat[:3])
  print(tot_mat.shape)
  #trajs[trajs==0] = 2
  #trajs = trajs - 1  # why inverting the data?
  n = int(3*data.shape[0] * test_percent) 
  train_set,test_set = tot_mat[n:],tot_mat[:n]
  return train_set,test_set

def toLoader(train_kids, test_kids):
  import torch
  from torch.utils.data import TensorDataset, DataLoader

  train_data = TensorDataset(torch.from_numpy(train_kids), torch.from_numpy(train_aggr))
  test_data = TensorDataset(torch.from_numpy(test_kids), torch.from_numpy(test_aggr))

  batch_size = 50

  train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)
  test_loader = DataLoader(test_data, shuffle=True, batch_size=batch_size)

  return train_loader, test_loader

In [None]:
def do_lstm(test_percent,data):
  """"
  :param kids: the kids' decisions against a particular opponent type
  :param aggr: array of a particular aggression score
  """
  train_set,test_set = prepare_data(test_percent,data)
    
  #Convert to loader
  train_loader, test_loader = toLoader(train_set, test_set)
  #Train the LSTM model
  model = lstmModel(2,10,2,2)
  print(model)
  model.to(device)
  model = train_model(model,train_loader)
    
  #use the model to predict scores
  #return the accuracy
  return test_model(model,test_loader)

In [None]:
#not used
def test_model(model,test_loader,batch_size=50):
  num_correct = 0

  model.eval()
  for inputs, labels in test_loader:
      inputs, labels = inputs.to(device), labels.to(device)
      output = model(inputs)
      #pred = torch.round(output.squeeze()) #rounds the output to 0/1
      #print(pred)
      correct_tensor = pred.eq(labels.float().view_as(pred))
      correct = np.squeeze(correct_tensor.cpu().numpy())
      num_correct += np.sum(correct)
  test_acc = num_correct/len(test_loader.dataset)
  return test_acc

#not used
def train_model(model,train_loader):
  model.train()
  criterion = nn.MSELoss()
  optimizer = optim.Adam(model.parameters(), lr = 1e-2)
  epochs, batch_size = 10, 50
  counter = 0

  for i in range(epochs):
    for inputs, labels in train_loader:
        counter += 1
        inputs, labels = inputs.to(device), labels.to(device)
        print(model)
        model.zero_grad()
        print(model)
        output = model(inputs)
        loss = criterion(output.squeeze(), labels.float())
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), clip)
        optimizer.step()
        
        """if counter%print_every == 0:
            print("Epoch: {}/{}...".format(i+1, epochs),
                  "Step: {}...".format(counter),
                  "Loss: {:.6f}...".format(loss.item()))"""
  return model


class lstmModel(nn.Module):
    def __init__(self,in_dim,hidden_dim,out_dim,layer_num):
        super().__init__()
        self.lstmLayer=nn.LSTM(in_dim,hidden_dim,layer_num)
        self.relu=nn.ReLU()
        self.fcLayer=nn.Linear(hidden_dim,out_dim)
        self.weightInit = (np.sqrt(1.0/hidden_dim))

    def forward(self, x):
        out,_=self.lstmLayer(x)
        out=self.relu(out)
        out=self.fcLayer(out)
        out = nn.Softmax(dim=-1)(out)
        return out

In [None]:
device = torch.device('cuda')

proactive_aggr, proactive_na = get_aggr('P-Proactive_aggr')
reactive_aggr,reactive_na = get_aggr('P-Reactive_aggr')
tot_aggr, tot_na = get_aggr('P-Aggression_Total')

"""All 10 Rounds decisions"""
kids_tft = df[tft].to_numpy()
kids_def = df[defs].to_numpy()
kids_coop = df[coop].to_numpy()

In [None]:
all_dec = df[decisions].to_numpy()
all_dec_above_med_pa = np.delete(all_dec, proactive_na,axis=0)
to_delete = []
for i, dec in enumerate(all_dec_above_med_pa):
  if proactive_aggr[i]==0:
    to_delete.append(i)
all_dec_above_med_pa = np.delete(all_dec_above_med_pa, to_delete,axis=0)

In [None]:
train_set,test_set = prepare_data(0.2, all_dec_above_med_pa)

[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 1. 0. 0. 1. 0. 0. 1.]
 [1. 1. 0. 1. 0. 1. 1. 0. 0. 1. 1. 0. 1. 0. 1. 1. 0. 0. 1. 0.]]
(474, 20)
[[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
  [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]

 [[0. 0. 1. 0. 0. 0. 1. 0. 0. 0.]
  [1. 0. 0. 1. 0. 0. 1. 0. 0. 1.]]

 [[1. 1. 0. 1. 0. 1. 1. 0. 0. 1.]
  [1. 0. 1. 0. 1. 1. 0. 0. 1. 0.]]]
(474, 2, 10)


In [None]:
lag = 1
n_nodes,n_layers = 10, 2
lstm = lstmModel(2, n_nodes, 2, n_layers)
lstm.to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(lstm.parameters(), lr = 1e-2)
n_epochs, window, batch_size = 10, 10, 100
loss_set = []
for ep in np.arange(n_epochs) :
    for bc in np.arange(train_set.shape[0]/batch_size):
        inputs = Variable(torch.from_numpy(train_set[int(bc*batch_size):int((bc+1)*batch_size)]).transpose(1,2).float())
        target = Variable(torch.from_numpy(train_set[int(bc*batch_size):int((bc+1)*batch_size)]).transpose(1,2).float())
        print(train_set[0].shape)
        inputs,target = inputs.to(device),target.to(device)
        output = lstm(inputs)
        loss = criterion(output.squeeze()[:,:-lag,0], target[:,lag:,0])
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        print_loss = loss.item()
        loss_set.append(print_loss)
        if bc % window == 0 :
            #print(fold)
            print('Epoch[{}/{}], Batch[{}/{}], Loss: {:.5f}'.format(ep+1, n_epochs, bc+1, train_set.shape[0]/batch_size, print_loss))
lstm = lstm.eval()

(2, 10)
Epoch[1/10], Batch[1.0/3.8], Loss: 0.26508
(2, 10)
(2, 10)
(2, 10)
(2, 10)
Epoch[2/10], Batch[1.0/3.8], Loss: 0.25563
(2, 10)
(2, 10)
(2, 10)
(2, 10)
Epoch[3/10], Batch[1.0/3.8], Loss: 0.25004
(2, 10)
(2, 10)
(2, 10)
(2, 10)
Epoch[4/10], Batch[1.0/3.8], Loss: 0.25241
(2, 10)
(2, 10)
(2, 10)
(2, 10)
Epoch[5/10], Batch[1.0/3.8], Loss: 0.25499
(2, 10)
(2, 10)
(2, 10)
(2, 10)
Epoch[6/10], Batch[1.0/3.8], Loss: 0.25336
(2, 10)
(2, 10)
(2, 10)
(2, 10)
Epoch[7/10], Batch[1.0/3.8], Loss: 0.25089
(2, 10)
(2, 10)
(2, 10)
(2, 10)
Epoch[8/10], Batch[1.0/3.8], Loss: 0.24968
(2, 10)
(2, 10)
(2, 10)
(2, 10)
Epoch[9/10], Batch[1.0/3.8], Loss: 0.24929
(2, 10)
(2, 10)
(2, 10)
(2, 10)
Epoch[10/10], Batch[1.0/3.8], Loss: 0.24917
(2, 10)
(2, 10)
(2, 10)
