In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import TensorDataset, DataLoader
from torch.optim.lr_scheduler import _LRScheduler
from sklearn.preprocessing import MinMaxScaler

  from .autonotebook import tqdm as notebook_tqdm


In [837]:
def generate_message_activity(hw, target=1, max_interval=10):
    noise_strength = np.random.randint(4, size=1)
    x = np.array(list(range(0, hw)))
    y = np.cos(x)  + 1 + np.random.randint(4, size=len(x))#*noise_strength
    step = np.random.choice([0, 0.1, 0.15, 0.25, 0.3, 0.4, 0.5])
    sv = int(step * hw)
    k = int(hw / 32)
    k = k if k else 1
    y[sv:] += x[sv:]/1.8/k + abs(y[sv])
    y *= max_interval / 10
    if target == 0:
        return y[::-1] / 3
    return y / 3


def generate_message_activity_plane(hw, target=1, max_interval=10):
    noise_strength = 0.5
    x = np.array(list(range(0, hw)))
    h = 1 + np.random.randint(1, size=1) if target == 0 else 5+np.random.randint(4, size=1)
    t = 0.4 if target == 0 else 1+np.random.randint(2, size=1)
    y = t * np.cos(x) + h + np.random.randint(3, size=len(x))*noise_strength
    y *= max_interval / 10
    return y


def generate_by(hw, plane, pos, max_interval):
    if plane:
        return generate_message_activity_plane(hw, pos, max_interval)
    else:
        return generate_message_activity(hw, pos, max_interval)


def gen_historic_data(hw = 40, c = 7, pos_size = 0, last_pos= 0, max_interval=13):
    hist = []
    last = None
    target = None
    
    ps = int(np.round(c * pos_size))
    '''
    for _ in range(ps):
        hist.append(generate_by(hw, np.random.choice([0, 1]), 1, max_interval))
    for _ in range(c - ps):
        hist.append(generate_by(hw, np.random.choice([0, 1]), 0, max_interval))
#    hist = np.array(hist).mean(axis=0)
    '''
    if pos_size:
        if last_pos:
            last = generate_by(365-hw, np.random.choice([0, 1]), 1, max_interval)
            target = 1
        else:
            last = generate_by(365-hw, np.random.choice([0, 1]), 0, max_interval)
            target = 0
    else:
        plane = np.random.choice([0, 1])
        if last_pos:
            last = generate_by(365-hw, plane, 1, max_interval)
            target = 1
        else:
            last = generate_by(365-hw, plane, 0, max_interval)
            if not plane:
                target = 1
            else:
                target = 0
    activity_series = generate_by(hw, np.random.choice([0, 1]), pos_size, max_interval)
    
    
    # padding
    activity_series = np.pad(activity_series, (365 - len(activity_series)%365, 0))
    last = np.pad(last, (365 - len(last)%365, 0))
    
    return activity_series, last, target


def gen_dataset(hw, c, size, max_interval):
    activity = []
    target = []
    last_a = []
    
    for _ in range(size):
        pos_size = np.random.choice([0, 1])
        last_pos = np.random.choice([0, 1])
        hw = np.random.randint(10, 320)
        act, last, targ = gen_historic_data(hw=hw, c=c, pos_size=pos_size, last_pos=last_pos, max_interval=max_interval)
        activity.append(np.round(act))
        last_a.append(np.round(last))
        target.append(targ)
        
    return activity, last_a, target
        

def gen_worker_df(max_range):
    df_dict = {}
    for k in max_range:
        pos_size = np.random.choice([0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1])
        last_pos = np.random.choice([0, 1])
        df_dict[k] = gen_historic_data(hw=40, c=11, pos_size=pos_size, last_pos=last_pos, max_interval=max_range[k])[0].astype(int)
    return df_dict

In [838]:
max_range = {"sent_messages_count": 10,
"received_messages_count": 10, 
"recipient_counts": 10, 
"bcc_count": 10, 
"cc_count": 10, 
"read_messages_later_than": 10, 
"days_between_received_and_read":5, 
"replied_messages_count": 10, 
"sent_characters_count": 5400, 
"messages_outside_working_hours": 10, 
#"received_to_sent_ratio": 1, 
"messages_with_question_and_no_reply": 5}

In [839]:
type_up = {"sent_messages_count": 1,
"received_messages_count": 1, 
"recipient_counts": 1, 
"bcc_count": 1, 
"cc_count": 1, 
"read_messages_later_than": 0, 
"days_between_received_and_read":0, 
"replied_messages_count": 1, 
"sent_characters_count": 1, 
"messages_outside_working_hours": 0, 
#"received_to_sent_ratio": 1, 
"messages_with_question_and_no_reply": 0}

In [840]:
activities, last, targets = gen_dataset(40, 11, 5000, 10)
v_activities, v_last, v_targets = gen_dataset(40, 11, 1000, 5400)

scaler = MinMaxScaler(feature_range=(0, 1))
activities_scaled = scaler.fit_transform(activities)
v_activities_scaled = scaler.transform(v_activities)

In [841]:
def scale(sample_mat, interval_max):
    scaled_mat = sample_mat / interval_max
    return scaled_mat

In [842]:
def scale_by_max(worker, max_range):
    wk_dict = worker.to_dict(orient="list")
    scaled_worker = {}
    for k in max_range:
        scaled_worker[k] = scale(np.array(wk_dict[k]), max_range[k])
    return scaled_worker

In [843]:
def predict_by_max(model, scaled_worker, type_up):
    proba = 0
    for k in type_up:
        x = torch.tensor(scaled_worker[k]).unsqueeze(0).unsqueeze(2).float()
        pred = model(x).item()
        if not type_up[k]:
            pred = 1 - pred
        proba += pred
    return 1 - proba  / len(type_up)

In [844]:
sum(targets) / len(targets)

0.6254

In [845]:
sum(v_targets) / len(v_targets)

0.631

In [846]:
activities_scaled = scale(np.array(activities), 10)
v_activities_scaled = scale(np.array(v_activities), 5400)

In [847]:
last_scaled = scale(np.array(last), 10)
v_last_scaled = scale(np.array(v_last), 5400)

import joblib
scaler_filename = "scaler.save"
joblib.dump(scaler, scaler_filename)
# And now to load...
scaler = joblib.load(scaler_filename)

In [848]:
activities_t = torch.tensor(activities_scaled).float()
v_activities_t = torch.tensor(v_activities_scaled).float()

last_t = torch.tensor(last_scaled).float()
v_last_t = torch.tensor(v_last_scaled).float()

targets_t = torch.tensor(targets).float()
v_targets_t = torch.tensor(v_targets).float()

In [849]:
train_dataset = TensorDataset(activities_t, last_t, targets_t)
train_dl = DataLoader(train_dataset, 32, shuffle=True, num_workers=10)

In [850]:
val_dataset = TensorDataset(v_activities_t, v_last_t, v_targets_t)
val_dl = DataLoader(val_dataset, 32, shuffle=True, num_workers=10)

In [851]:
def accuracy(output, target):
    return (output.argmax(dim=1) == target).float().mean().item()

In [852]:
class CyclicLR(_LRScheduler):
    
    def __init__(self, optimizer, schedule, last_epoch=-1):
        assert callable(schedule)
        self.schedule = schedule
        super().__init__(optimizer, last_epoch)

    def get_lr(self):
        return [self.schedule(self.last_epoch, lr) for lr in self.base_lrs]

In [853]:
def cosine(t_max, eta_min=0):
    
    def scheduler(epoch, base_lr):
        t = epoch % t_max
        return eta_min + (base_lr - eta_min)*(1 + np.cos(np.pi*t/t_max))/2
    
    return scheduler

In [854]:
class LSTMClassifier(nn.Module):
    """Very simple implementation of LSTM-based time-series classifier."""
    
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.layer_dim = layer_dim
        self.rnn_1 = nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True)
        self.rnn_2 = nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim*2, output_dim)
        self.batch_size = None
        self.hidden = None
    
    def forward(self, x1, x2):
        h0, c0 = self.init_hidden(x1)
        out1, (hn, cn) = self.rnn_1(x1, (h0, c0))
        h0, c0 = self.init_hidden(x2)
        out2, (hn, cn) = self.rnn_2(x2, (h0, c0))
        out = torch.cat([out1[:, -1, :], out2[:, -1, :]], axis=1)
        out = torch.sigmoid(self.fc(out))
        return out
    
    def init_hidden(self, x):
        h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim)
        c0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim)
        return [t.to("cuda:0") for t in (h0, c0)]

In [855]:
from torchmetrics.classification import BinaryF1Score

metric = BinaryF1Score().to("cuda:0")

In [856]:
input_dim = 1    
hidden_dim = 256
layer_dim = 3
output_dim = 1
seq_dim = 72

lr = 0.0001
n_epochs = 1000
iterations_per_epoch = len(train_dl)
best_acc = 0
patience, trials = 10, 0

model = LSTMClassifier(input_dim, hidden_dim, layer_dim, output_dim)
model = model.to("cuda:0")#.cpu()#.cuda()
criterion = nn.BCELoss()
opt = torch.optim.RMSprop(model.parameters(), lr=lr)
sched = CyclicLR(opt, cosine(t_max=iterations_per_epoch * 2, eta_min=lr/100))

print('Start model training')

for epoch in range(1, n_epochs + 1):
    
    for i, (a_batch, l_batch, y_batch) in enumerate(train_dl):

        model.train()
        a_batch = a_batch.unsqueeze(2).to("cuda:0")#.cpu()
        l_batch = l_batch.unsqueeze(2).to("cuda:0")#.cpu()#.cuda()
        y_batch = y_batch.unsqueeze(1).to("cuda:0")#.cpu()#.cuda()
        sched.step()
        opt.zero_grad()
        out = model(a_batch, l_batch)
        loss = criterion(out, y_batch)
        loss.backward()
        opt.step()
    
    model.eval()
    correct, total = 0, 0
    f1_tmp = []
    for a_val, l_val, y_val in val_dl: #val_dl
        a_val, l_val, y_val = [t.to("cuda:0") for t in (a_val, l_val, y_val)]
        out = model(a_val.unsqueeze(2), l_val.unsqueeze(2))
        preds = out #F.log_softmax(out, dim=1).argmax(dim=1)
        preds[preds < 0.5] = 0
        preds[preds >= 0.5] = 1
        total += y_val.size(0)
        correct += (preds == y_val.unsqueeze(1)).sum().item()
        f1_tmp.append(metric(preds, y_val.unsqueeze(1)).item())
    
    f1 = sum(f1_tmp) / len(f1_tmp)
    acc = correct / total

    print(f'Epoch: {epoch:3d}. Loss: {loss.item():.6f}. Acc.: {acc:2.2%}. F1: {f1:.6f}')

    if acc > best_acc:
        trials = 0
        best_acc = acc
        torch.save(model.state_dict(), 'lstm_best.pth')
        print(f'Epoch {epoch} best model saved with accuracy: {best_acc:2.2%}')
    else:
        trials += 1
        if trials >= patience:
            print(f'Early stopping on epoch {epoch}')
            break

Start model training
Epoch:   1. Loss: 0.125941. Acc.: 85.90%. F1: 0.900224
Epoch 1 best model saved with accuracy: 85.90%
Epoch:   2. Loss: 0.270351. Acc.: 85.90%. F1: 0.899900
Epoch:   3. Loss: 0.361747. Acc.: 86.20%. F1: 0.871252
Epoch 3 best model saved with accuracy: 86.20%
Epoch:   4. Loss: 0.186422. Acc.: 86.50%. F1: 0.888471
Epoch 4 best model saved with accuracy: 86.50%
Epoch:   5. Loss: 0.109723. Acc.: 86.20%. F1: 0.885560
Epoch:   6. Loss: 0.206575. Acc.: 85.40%. F1: 0.874124
Epoch:   7. Loss: 0.098551. Acc.: 86.30%. F1: 0.881045
Epoch:   8. Loss: 0.192709. Acc.: 85.90%. F1: 0.896401
Epoch:   9. Loss: 0.002972. Acc.: 85.90%. F1: 0.896780
Epoch:  10. Loss: 0.159146. Acc.: 86.20%. F1: 0.872643
Epoch:  11. Loss: 0.286916. Acc.: 85.90%. F1: 0.897032
Epoch:  12. Loss: 0.083817. Acc.: 86.00%. F1: 0.873105
Epoch:  13. Loss: 0.002491. Acc.: 85.10%. F1: 0.862581
Epoch:  14. Loss: 0.096939. Acc.: 85.90%. F1: 0.900860
Early stopping on epoch 14


In [619]:
torch.save(model.state_dict(), 'lstm_siam_v4.pth')

In [623]:
sum(targets) / len(targets)

0.6318

In [857]:
class GRUClassifier(nn.Module):
    """Very simple implementation of LSTM-based time-series classifier."""
    
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.layer_dim = layer_dim
        self.rnn_1 = nn.GRU(input_dim, hidden_dim, layer_dim, batch_first=True)
        self.rnn_2 = nn.GRU(input_dim, hidden_dim, layer_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim*2, output_dim)
        self.batch_size = None
        self.hidden = None
    
    def forward(self, x1, x2):
        h0 = self.init_hidden(x1)
        out1, hn = self.rnn_1(x1, h0)
        h0 = self.init_hidden(x2)
        out2, hn = self.rnn_2(x2, h0)
        out = torch.cat([out1[:, -1, :], out2[:, -1, :]], axis=1)
        out = torch.sigmoid(self.fc(out))
        return out
    
    def init_hidden(self, x):
        h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim)
        return torch.stack([t.to("cuda:0") for t in h0])

In [859]:
input_dim = 1    
hidden_dim = 256
layer_dim = 3
output_dim = 1
seq_dim = 72

lr = 0.0001
n_epochs = 1000
iterations_per_epoch = len(train_dl)
best_acc = 0
patience, trials = 10, 0

model = GRUClassifier(input_dim, hidden_dim, layer_dim, output_dim)
model = model.to("cuda:0")#.cuda()
criterion = nn.BCELoss()
opt = torch.optim.RMSprop(model.parameters(), lr=lr)
sched = CyclicLR(opt, cosine(t_max=iterations_per_epoch * 2, eta_min=lr/100))

print('Start model training')

for epoch in range(1, n_epochs + 1):
    
    for i, (a_batch, l_batch, y_batch) in enumerate(train_dl):

        model.train()
        a_batch = a_batch.unsqueeze(2).to("cuda:0")
        l_batch = l_batch.unsqueeze(2).to("cuda:0")#.cuda()
        y_batch = y_batch.unsqueeze(1).to("cuda:0")#.cuda()
        sched.step()
        opt.zero_grad()
        out = model(a_batch, l_batch)
        loss = criterion(out, y_batch)
        loss.backward()
        opt.step()
    
    model.eval()
    correct, total = 0, 0
    f1_tmp = []
    for a_val, l_val, y_val in val_dl: #val_dl
        a_val, l_val, y_val = [t.to("cuda:0") for t in (a_val, l_val, y_val)]
        out = model(a_val.unsqueeze(2), l_val.unsqueeze(2))
        preds = out #F.log_softmax(out, dim=1).argmax(dim=1)
        preds[preds < 0.5] = 0
        preds[preds >= 0.5] = 1
        total += y_val.size(0)
        correct += (preds == y_val.unsqueeze(1)).sum().item()
        f1_tmp.append(metric(preds, y_val.unsqueeze(1)).item())
    
    f1 = sum(f1_tmp) / len(f1_tmp)
    acc = correct / total

    print(f'Epoch: {epoch:3d}. Loss: {loss.item():.6f}. Acc.: {acc:2.2%}. F1: {f1:.6f}')

    if acc > best_acc:
        trials = 0
        best_acc = acc
        torch.save(model.state_dict(), 'gru_siam.pth')
        print(f'Epoch {epoch} best model saved with accuracy: {best_acc:2.2%}')
    else:
        trials += 1
        if trials >= patience:
            print(f'Early stopping on epoch {epoch}')
            break

Start model training
Epoch:   1. Loss: 0.430290. Acc.: 87.20%. F1: 0.895476
Epoch 1 best model saved with accuracy: 87.20%
Epoch:   2. Loss: 0.451126. Acc.: 86.30%. F1: 0.875967
Epoch:   3. Loss: 0.274705. Acc.: 85.50%. F1: 0.868307
Epoch:   4. Loss: 0.187914. Acc.: 85.90%. F1: 0.896842
Epoch:   5. Loss: 0.252547. Acc.: 85.90%. F1: 0.872287
Epoch:   6. Loss: 0.272890. Acc.: 85.90%. F1: 0.901061
Epoch:   7. Loss: 0.323231. Acc.: 85.90%. F1: 0.891283
Epoch:   8. Loss: 0.332880. Acc.: 86.40%. F1: 0.887938
Epoch:   9. Loss: 0.104859. Acc.: 85.70%. F1: 0.878138
Epoch:  10. Loss: 0.299773. Acc.: 86.30%. F1: 0.870340
Epoch:  11. Loss: 0.256945. Acc.: 85.90%. F1: 0.897588
Early stopping on epoch 11


In [860]:
class RNNClassifier(nn.Module):
    """Very simple implementation of LSTM-based time-series classifier."""
    
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.layer_dim = layer_dim
        self.rnn_1 = nn.RNN(input_dim, hidden_dim, layer_dim, batch_first=True)
        self.rnn_2 = nn.RNN(input_dim, hidden_dim, layer_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim*2, output_dim)
        self.batch_size = None
        self.hidden = None
    
    def forward(self, x1, x2):
        h0 = self.init_hidden(x1)
        out1, hn = self.rnn_1(x1, h0)
        h0 = self.init_hidden(x2)
        out2, hn = self.rnn_2(x2, h0)
        out = torch.cat([out1[:, -1, :], out2[:, -1, :]], axis=1)
        out = torch.sigmoid(self.fc(out))
        return out
    
    def init_hidden(self, x):
        h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim)
        return torch.stack([t.to("cuda:0") for t in h0])

In [861]:
input_dim = 1    
hidden_dim = 256
layer_dim = 3
output_dim = 1
seq_dim = 72

lr = 0.0001
n_epochs = 1000
iterations_per_epoch = len(train_dl)
best_acc = 0
patience, trials = 10, 0

model = RNNClassifier(input_dim, hidden_dim, layer_dim, output_dim)
model = model.to("cuda:0")#.cuda()
criterion = nn.BCELoss()
opt = torch.optim.RMSprop(model.parameters(), lr=lr)
sched = CyclicLR(opt, cosine(t_max=iterations_per_epoch * 2, eta_min=lr/100))

print('Start model training')

for epoch in range(1, n_epochs + 1):
    
    for i, (a_batch, l_batch, y_batch) in enumerate(train_dl):

        model.train()
        a_batch = a_batch.unsqueeze(2).to("cuda:0")
        l_batch = l_batch.unsqueeze(2).to("cuda:0")#.cuda()
        y_batch = y_batch.unsqueeze(1).to("cuda:0")#.cuda()
        sched.step()
        opt.zero_grad()
        out = model(a_batch, l_batch)
        loss = criterion(out, y_batch)
        loss.backward()
        opt.step()
    
    model.eval()
    correct, total = 0, 0
    f1_tmp = []
    for a_val, l_val, y_val in val_dl: #val_dl
        a_val, l_val, y_val = [t.to("cuda:0") for t in (a_val, l_val, y_val)]
        out = model(a_val.unsqueeze(2), l_val.unsqueeze(2))
        preds = out #F.log_softmax(out, dim=1).argmax(dim=1)
        preds[preds < 0.5] = 0
        preds[preds >= 0.5] = 1
        total += y_val.size(0)
        correct += (preds == y_val.unsqueeze(1)).sum().item()
        f1_tmp.append(metric(preds, y_val.unsqueeze(1)).item())
    
    f1 = sum(f1_tmp) / len(f1_tmp)
    acc = correct / total

    print(f'Epoch: {epoch:3d}. Loss: {loss.item():.6f}. Acc.: {acc:2.2%}. F1: {f1:.6f}')

    if acc > best_acc:
        trials = 0
        best_acc = acc
        torch.save(model.state_dict(), 'rnn_siam.pth')
        print(f'Epoch {epoch} best model saved with accuracy: {best_acc:2.2%}')
    else:
        trials += 1
        if trials >= patience:
            print(f'Early stopping on epoch {epoch}')
            break

Start model training
Epoch:   1. Loss: 0.409851. Acc.: 85.80%. F1: 0.879077
Epoch 1 best model saved with accuracy: 85.80%
Epoch:   2. Loss: 0.232657. Acc.: 84.70%. F1: 0.858451
Epoch:   3. Loss: 0.919998. Acc.: 86.00%. F1: 0.865398
Epoch 3 best model saved with accuracy: 86.00%
Epoch:   4. Loss: 0.282409. Acc.: 86.10%. F1: 0.872080
Epoch 4 best model saved with accuracy: 86.10%
Epoch:   5. Loss: 0.179584. Acc.: 86.20%. F1: 0.867958
Epoch 5 best model saved with accuracy: 86.20%
Epoch:   6. Loss: 0.143323. Acc.: 86.10%. F1: 0.864766
Epoch:   7. Loss: 0.090099. Acc.: 86.00%. F1: 0.864713
Epoch:   8. Loss: 0.107419. Acc.: 86.10%. F1: 0.872636
Epoch:   9. Loss: 0.310652. Acc.: 86.30%. F1: 0.862340
Epoch 9 best model saved with accuracy: 86.30%
Epoch:  10. Loss: 0.414545. Acc.: 86.30%. F1: 0.880339
Epoch:  11. Loss: 0.280629. Acc.: 86.10%. F1: 0.870752
Epoch:  12. Loss: 0.104920. Acc.: 85.70%. F1: 0.870960
Epoch:  13. Loss: 0.281201. Acc.: 86.30%. F1: 0.875091
Epoch:  14. Loss: 0.495537. A

Exception ignored in: <function _releaseLock at 0x7f504e45d940>
Traceback (most recent call last):
  File "/home/user/conda/envs/dragon/lib/python3.8/logging/__init__.py", line 227, in _releaseLock
    def _releaseLock():
KeyboardInterrupt: 


RuntimeError: DataLoader worker (pid(s) 3811060, 3811172, 3811284, 3811396, 3811508, 3811620, 3811732) exited unexpectedly

In [240]:
@dataclass()
class UserStatisticItem:
    sent_messages_count: int
    received_messages_count: int 
    recipient_counts: int
    bcc_count: int 
    cc_count: int 
    days_between_received_and_read: []
    replied_messages_count: int 
    sent_characters_count: int 
    messages_outside_working_hours: int 
    received_to_sent_ratio: float
    bytesReceivedToSentRatio: float
    messages_with_question_and_no_reply: int
    read_messages_later_than: int
    count_events: int

        

    def dict(self):
        return {k: v for k, v in asdict(self).items()}

In [236]:
import pandas as pd

worker = pd.DataFrame(gen_worker_df(max_range))

In [237]:
from dataclasses import dataclass, asdict

In [241]:
form = []
for r in worker.values:
    form.append(UserStatisticItem(sent_messages_count=r[0],
            received_messages_count=r[1], 
            recipient_counts=r[2],
            bcc_count=r[3],
            cc_count=r[4], 
            days_between_received_and_read= r[5],
            replied_messages_count=r[6],
            sent_characters_count=r[7], 
            messages_outside_working_hours=r[8], 
            received_to_sent_ratio=r[9],
            bytesReceivedToSentRatio=r[10],
            messages_with_question_and_no_reply=r[0],
            read_messages_later_than=r[1],
            count_events=r[2]))

In [250]:
def to_worker_format(form):
    res = {}
    for k in form[0].dict():
        res[k] = []
    for row in form:
        row_d = row.dict()
        for k in row_d:
            res[k].append(row_d[k])
    return pd.DataFrame(res)

In [251]:
to_worker_format(form)

Unnamed: 0,sent_messages_count,received_messages_count,recipient_counts,bcc_count,cc_count,days_between_received_and_read,replied_messages_count,sent_characters_count,messages_outside_working_hours,received_to_sent_ratio,bytesReceivedToSentRatio,messages_with_question_and_no_reply,read_messages_later_than,count_events
0,6,3,5,2,5,3,2,5,1725,3,2,6,3,5
1,7,3,5,2,6,3,2,6,2013,4,2,7,3,5
2,6,4,4,2,6,3,2,6,1937,3,2,6,4,4
3,6,3,4,2,5,3,2,5,1764,3,2,6,3,4
4,5,2,4,2,4,2,2,4,1352,3,2,5,2,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75,4,6,5,0,5,1,2,5,167,5,0,4,6,5
76,5,5,6,1,4,1,2,5,289,5,0,5,5,6
77,5,5,5,0,4,2,2,5,392,6,0,5,5,5
78,5,8,7,0,5,2,3,5,518,6,0,5,8,7


In [387]:
scale(a, 10)

tensor([[[0.2000],
         [0.2000],
         [0.2000],
         [0.2000],
         [0.2000],
         [0.2000],
         [0.2000],
         [0.2000],
         [0.2000],
         [0.2000],
         [0.2000],
         [0.2000],
         [0.2000],
         [0.2000],
         [0.2000],
         [0.2000],
         [0.2000],
         [0.2000],
         [0.2000],
         [0.2000]]])

In [780]:
model.load_state_dict(torch.load("lstm_siam_v4.pth"))

<All keys matched successfully>

In [781]:
model.eval()

LSTMClassifier(
  (rnn_1): LSTM(1, 256, num_layers=3, batch_first=True)
  (rnn_2): LSTM(1, 256, num_layers=3, batch_first=True)
  (fc): Linear(in_features=512, out_features=1, bias=True)
)

In [798]:
a = torch.tensor(np.pad([7 for x in range(40)], (365 - 40 % 365, 0)))#.unsqueeze(0).unsqueeze(2)
b = torch.tensor(np.pad([9 for x in range(20)], (365 - 20 % 365, 0)))#.unsqueeze(0).unsqueeze(2)

In [799]:
a, b

(tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0

In [800]:
a = scale(a.unsqueeze(0).unsqueeze(2), 10)
b = scale(b.unsqueeze(0).unsqueeze(2), 10)

In [801]:
1 - model(a, b)

tensor([[0.0025]], grad_fn=<RsubBackward1>)

In [242]:
form[0].dict()

{'sent_messages_count': 6,
 'received_messages_count': 3,
 'recipient_counts': 5,
 'bcc_count': 2,
 'cc_count': 5,
 'days_between_received_and_read': 3,
 'replied_messages_count': 2,
 'sent_characters_count': 5,
 'messages_outside_working_hours': 1725,
 'received_to_sent_ratio': 3,
 'bytesReceivedToSentRatio': 2,
 'messages_with_question_and_no_reply': 6,
 'read_messages_later_than': 3,
 'count_events': 5}

In [116]:
print(f"Proba is: {predict_by_max(model, scale_by_max(worker, max_range), type_up)}")

Proba is: 0.7206324149261821


In [89]:
model.eval()

LSTMClassifier(
  (rnn): LSTM(1, 256, num_layers=3, batch_first=True)
  (fc): Linear(in_features=256, out_features=1, bias=True)
)

In [None]:
model()