In [None]:
!pip install ydata_profiling==4.5.1
!pip install matplotlib==3.7.3
!pip install pandas
!pip install numpy
!pip install sklearn
!pip install category_encoders
!pip install imblearn
!pip install tabulate
!pip install seaborn
!pip install torch

In [97]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import metrics
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler, MinMaxScaler, StandardScaler
from imblearn.combine import SMOTEENN
from imblearn.over_sampling import RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler
import warnings
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression, Lasso, Ridge
from ydata_profiling import ProfileReport
from sklearn.metrics import accuracy_score, f1_score, mean_squared_error, mean_absolute_error

import torch
from torch import nn
import torch.optim as optim # for optimizer
from torch.utils.tensorboard import SummaryWriter #for Tensorboard
from torch.nn import MSELoss, L1Loss
from torch.optim import SGD, Adam
import torch.nn.functional as F

%matplotlib inline

# Отключить все предупреждения
warnings.filterwarnings('ignore')

# Task 1.1: Multi-task deep learning

Read data

In [98]:
df = pd.read_json('lateness_data.json')

Make a report. Didn't find some outliers. Not have significant imbalance

In [3]:
from ydata_profiling import ProfileReport
profile = ProfileReport(df, title="Profiling Report")
profile.to_file("lateness_data.html")

Summarize dataset: 100%|█████████████| 86/86 [00:07<00:00, 11.23it/s, Completed]
Generate report structure: 100%|██████████████████| 1/1 [00:01<00:00,  1.70s/it]
Render HTML: 100%|████████████████████████████████| 1/1 [00:00<00:00,  1.22it/s]
Export report to file: 100%|█████████████████████| 1/1 [00:00<00:00, 116.59it/s]


### Preprocessing data

Encode categorical columns. 

In [99]:
label_encoder = LabelEncoder()
df['direct_delivery'] = label_encoder.fit_transform(df['direct_delivery'])
df['batched_pickup'] = label_encoder.fit_transform(df['batched_pickup'])
df['status'] = label_encoder.fit_transform(df['status'])

custom_mapping = {
    'automobile': 4,
    'bicycle': 3,
    'scooter': 2,
    'foot': 1,
}
df['transport_type'] = df['transport_type'].map(custom_mapping)

Make new feature instead two datetimes features. Retrieve day, month and hour of order status

In [100]:
df['order_time'] = pd.to_datetime(df['order_time'])

df['order_day'] = df['order_time'].dt.day
df['order_month'] = df['order_time'].dt.month
df['order_hour'] = df['order_time'].dt.hour

#sort by time like time series and delete this feature (not meaningfull)
df.sort_values(by=['order_time'], inplace=True)
df.drop(['order_time'], axis = 1, inplace=True)

### Split Data

In [101]:
train, test = train_test_split(df, test_size=0.2, shuffle = False)

### Scaling

It scales and transforms features (variables) in a dataset so that they fall within a specific range, StandardScaler: between -1 and 1. Deep learning models often include batch normalization layers, which can adapt to different scales during training. However, it's still a good practice to standardize your input data. 

In [102]:
columns_to_scale = ['delivery_distance', 'order_preparation_time', 'responsible_id', 'store_latitude', 'store_longitude', 'client_latitude', 'client_longitude', 'order_day', 'order_month', 'order_hour']

scaler = StandardScaler()
scaler.fit(train[columns_to_scale])
train[columns_to_scale] = scaler.transform(train[columns_to_scale])
test[columns_to_scale] = scaler.transform(test[columns_to_scale])

### Feature Selecting
Lasso (Least Absolute Shrinkage and Selection Operator) is a regularization technique used in linear regression and other machine learning models to select important features and prevent overfitting. 

In [103]:
lasso = Lasso(alpha=0.01)
# Fit the Lasso model on the training data for REGRESSION task
X_train = train.drop(['status_time', 'status'], axis = 1)
lasso.fit(X_train, train['status_time'])
# Get the feature importances or coefficients
feature_importances = lasso.coef_
importance_dict = dict(zip(X_train.columns, feature_importances))
sorted_importance = sorted(importance_dict.items(), key=lambda x: abs(x[1]), reverse=True)
print(sorted_importance)

[('direct_delivery', 1.7046368152150357), ('order_preparation_time', 0.7658858162604002), ('batched_pickup', 0.6962759992259239), ('order_month', 0.46963854823090223), ('order_day', 0.18190807784478655), ('store_longitude', 0.10569262556105394), ('order_hour', -0.09607567851672215), ('responsible_id', 0.072734150790699), ('delivery_distance', 0.07162370604466285), ('transport_type', -0.0), ('store_latitude', 0.0), ('client_latitude', 0.0), ('client_longitude', 0.0)]


In [104]:
lasso = Lasso(alpha=0.01)
# Fit the Lasso model on the training data for CLASSIFICATION task
lasso.fit(X_train, train['status'])
# Get the feature importances or coefficients
feature_importances = lasso.coef_
importance_dict = dict(zip(X_train.columns, feature_importances))
sorted_importance = sorted(importance_dict.items(), key=lambda x: abs(x[1]), reverse=True)
print(sorted_importance)

[('direct_delivery', -0.24282146206339947), ('order_preparation_time', -0.08341745349942899), ('batched_pickup', -0.023131345996951994), ('delivery_distance', 0.01871273858638263), ('order_month', -0.01847353742267497), ('client_latitude', 0.017151125508079717), ('responsible_id', -0.010556054423950844), ('store_longitude', -0.003971829480059397), ('order_hour', 0.00015904912685032446), ('transport_type', 0.0), ('store_latitude', 0.0), ('client_longitude', -0.0), ('order_day', -0.0)]


For both task we don't need columns (has zeroes coeficient): store_latitude, client_longitude, responsible_id, transport_type

In [105]:
train.drop(['responsible_id', 'transport_type', 'store_latitude', 'client_latitude'], axis = 1, inplace=True) 
test.drop(['responsible_id', 'transport_type', 'store_latitude', 'client_latitude'], axis = 1, inplace=True) 

### Make Dataloader

In [155]:
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader

class CustomDataset(Dataset):
    def __init__(self, dataframe):
        self.dataframe = dataframe

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        sample = self.dataframe.iloc[idx]
        inputs = torch.Tensor(sample[features_columns])  # Extract input features from DataFrame
        classification_label = torch.tensor([sample[classification_column]], dtype=torch.int64)  # Extract classification label from DataFrame
        regression_label = torch.Tensor([sample[regression_column]])  # Extract regression label from DataFrame

        return inputs, classification_label, regression_label


# Names of DataFrame columns
features_columns = [name for name in train.columns if name not in ['status','status_time']]
classification_column = 'status' 
regression_column = 'status_time'  

# Create a CustomDataset instance
custom_dataset_train = CustomDataset(train)
custom_dataset_test = CustomDataset(test)

# Create a dataloader
batch_size = 64  # Specify the batch size - CHANGE
train_dataloader = DataLoader(custom_dataset_train, batch_size=batch_size, shuffle=False)
test_dataloader = DataLoader(custom_dataset_test, batch_size=batch_size, shuffle=False)

### Multi-Task Model


Input we had 9 columns. Model has 5 layers. In output layer we have 4 neuros, 3 for classification task (probability for each class) and 1 for regression task. Loss will be the sum of losses for regression and classification tasks.

In [156]:
class MyMultiTaskNet1(nn.Module):
    def __init__(self, input_size):
        super(MyMultiTaskNet1, self).__init__()
        self.fc1 = nn.Linear(input_size, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 32)
        self.fc5 = nn.Linear(32, 4)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = torch.relu(self.fc4(x))
        x = self.fc5(x)
        classification_output, regression_output = x.split([3, 1], dim=1)
        classification_output = torch.nn.functional.log_softmax(classification_output, dim=1)
        return classification_output, regression_output

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model_nn = MyMultiTaskNet1(input_size = len(train.columns) - 2).to(device)
number_of_model = 3

In [157]:
def trainModel(model, device, train_loader, optimizer, epoch, log_interval=700, alpha=0.2, beta=0.8):
    train_loss_class = []
    train_loss_reg = []
    train_loss_total = []
    all_labels = []
    all_predictions = []
    mae_reg_error = []
    model.train()
    for batch_idx, (data, class_target, reg_target) in enumerate(train_loader):
        data, class_target, reg_target = data.to(device), class_target.to(device), reg_target.to(device)
        optimizer.zero_grad()
        class_output, reg_output = model(data)

        class_target = class_target.reshape(1, -1).squeeze()
        class_loss = F.nll_loss(class_output, class_target)
        reg_loss = criterion_regression(reg_output, reg_target)

        loss = alpha * class_loss + beta * reg_loss
        loss.backward()
        optimizer.step()

        mae_reg_error.append(mean_absolute_error(reg_target, reg_output.detach().numpy()))
        
        train_loss_class.append(class_loss.item())
        train_loss_reg.append(reg_loss.item())
        train_loss_total.append(loss.item())
        
        _, predictions = torch.max(class_output, 1)
        all_labels.extend(class_target.cpu().numpy())
        all_predictions.extend(predictions.cpu().numpy())
        
        if batch_idx % log_interval == 0:
            print(f"Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} ({100. * batch_idx / len(train_loader)}%)]\tLoss: {loss.item()}, Class Loss: {class_loss.item()}, Regression Loss: {reg_loss.item()}")

    MAE = sum(mae_reg_error) / len(mae_reg_error)
    train_loss_class = sum(train_loss_class) / len(train_loss_class)
    train_loss_reg = sum(train_loss_reg) / len(train_loss_reg)
    train_loss_total = sum(train_loss_total) / len(train_loss_total)
    accuracy = accuracy_score(all_labels, all_predictions)
    f1 = f1_score(all_labels, all_predictions, average='weighted')
    print(f"TRAIN: MAE = {MAE}   Accuracy = {accuracy}    F1 = {f1}\n\n")

    writer.add_scalar(f'Training Class Loss (MyMultiTaskNet{number_of_model})', train_loss_class, epoch)
    writer.add_scalar(f'Training Reg Loss (MyMultiTaskNet{number_of_model})', train_loss_reg, epoch)
    writer.add_scalar(f'Training Total Loss (MyMultiTaskNet{number_of_model})', train_loss_total, epoch)
    writer.add_scalar(f'Training MAE (MyMultiTaskNet{number_of_model})', MAE, epoch)
    writer.add_scalar(f'Training Accuracy (MyMultiTaskNet{number_of_model})', accuracy, epoch)
    writer.add_scalar(f'Training F1 Score (MyMultiTaskNet{number_of_model})', f1, epoch)
    
def testModel(model, device, test_loader):
    model.eval()
    test_loss_class = []
    test_loss_reg = []
    all_labels = []
    all_predictions = []
    mae_reg_error = []
    with torch.no_grad():
        for batch_idx, (data, class_target, reg_target) in enumerate(test_dataloader):
            data, class_target, reg_target = data.to(device), class_target.to(device), reg_target.to(device)
            class_output, reg_output = model(data)

            class_target = class_target.reshape(1, -1).squeeze()
            class_loss = torch.nn.functional.nll_loss(class_output, class_target, reduction='sum').item()  # sum up batch loss
            reg_loss = criterion_regression(reg_output, reg_target)

            test_loss_class.append(class_loss)
            test_loss_reg.append(reg_loss.item())
        
            _, predictions = torch.max(class_output, 1)
            all_labels.extend(class_target.cpu().numpy())
            all_predictions.extend(predictions.cpu().numpy())
            
            mae_reg_error.append(mean_absolute_error(reg_target, reg_output))

    MAE = sum(mae_reg_error) / len(mae_reg_error)
    test_loss_class = sum(test_loss_class) / len(test_loss_class)
    test_loss_reg = sum(test_loss_reg) / len(test_loss_reg)
    accuracy = accuracy_score(all_labels, all_predictions)
    f1 = f1_score(all_labels, all_predictions, average='weighted')
    print(f"TEST: MAE = {MAE}   Accuracy = {accuracy}    F1 = {f1}\n\n")

    writer.add_scalar(f'Testing Class Loss (MyMultiTaskNet{number_of_model})', test_loss_class, epoch)
    writer.add_scalar(f'Testing Reg Loss (MyMultiTaskNet{number_of_model})', test_loss_reg, epoch)
    writer.add_scalar(f'Testing MAE (MyMultiTaskNet{number_of_model})', MAE, epoch)
    writer.add_scalar(f'Testing Accuracy (MyMultiTaskNet{number_of_model})', accuracy, epoch)
    writer.add_scalar(f'Testing F1 Score (MyMultiTaskNet{number_of_model})', f1, epoch)

In [158]:
#alpha=0.2 batch_size = 64
epochs = 5
lr = 0.01
momentum = 0.9
log_interval = 200
criterion_regression = nn.MSELoss()
#optimizer = optim.SGD(model_nn.parameters(), lr=lr, momentum=momentum)
optimizer = optim.Adam(model_nn.parameters(), lr=0.001)
writer = SummaryWriter("/Users/ninelco/Documents/Innopolis/F23/ML/Assignment2")

for epoch in range(1, epochs + 1):
    trainModel(model_nn, device, train_dataloader, optimizer, epoch, log_interval, alpha=0.5, beta=0.5)
    testModel(model_nn, device, test_dataloader)

writer.close()

TRAIN: MAE = 6.164964958907034   Accuracy = 0.3753978106308543    F1 = 0.3698561615191156


TEST: MAE = 6.573940608274696   Accuracy = 0.4393084458086941    F1 = 0.3499492502266904


TRAIN: MAE = 6.123937360818636   Accuracy = 0.42351879020426586    F1 = 0.41948805661089594


TEST: MAE = 6.509818052352333   Accuracy = 0.4726222182097233    F1 = 0.4536517296642787


TRAIN: MAE = 6.1083176855576164   Accuracy = 0.45288342173569573    F1 = 0.44182111198254415


TEST: MAE = 6.478793904142353   Accuracy = 0.4765494515415519    F1 = 0.4559696772765077


TRAIN: MAE = 6.098667332411673   Accuracy = 0.4604107888500169    F1 = 0.4491056611242377


TEST: MAE = 6.462976311400576   Accuracy = 0.47334446801787566    F1 = 0.44845008280833704


TRAIN: MAE = 6.091481416406184   Accuracy = 0.46179889403001917    F1 = 0.45392903491544195


TEST: MAE = 6.452321639322067   Accuracy = 0.4717194059495328    F1 = 0.4364094009555219




# Task 1.2: Cascade deep learning

At first linear model will predict the status of order. Then our X with predict status will be input for linear model for regression model. Both models have own loss and optimizer.

In [131]:
class ClassClassificationModel(nn.Module):
    def __init__(self):
        super(ClassClassificationModel, self).__init__()
        self.fc1 = nn.Linear(len(train.columns) - 2, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32, 3)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        class_output = self.fc4(x)
        return torch.nn.functional.log_softmax(class_output, dim=1)

class TimePredictionModel(nn.Module):
    def __init__(self):
        super(TimePredictionModel, self).__init__()
        self.fc1 = nn.Linear(len(train.columns) - 1, 512)
        self.drop_1 = nn.Dropout2d(p=0.4)
        self.fc2 = nn.Linear(512, 256)
        self.drop_2 = nn.Dropout2d(p=0.2)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, 64)
        self.fc5 = nn.Linear(64, 32)
        self.fc6 = nn.Linear(32, 1)

    def forward(self, x):
        x = torch.relu(self.drop_1(self.fc1(x)))
        x = torch.tanh(self.drop_2(self.fc2(x)))
        x = torch.relu(self.fc3(x))
        x = torch.tanh(self.fc4(x))
        x = torch.relu(self.fc5(x))
        time_output = self.fc6(x)
        return time_output

model_classs = ClassClassificationModel()
model_reg = TimePredictionModel()
number_of_model = 3

In [125]:
def trainCascade(model_class, model_reg, device, train_loader, optimizer_class, optimizer_reg, epoch, log_interval=700, alpha=0.2, beta=0.8):
    model_class.train()
    model_reg.train()
    train_loss_class = []
    train_loss_reg = []
    all_labels = []
    all_predictions = []
    mae_reg_error = []
    
    for batch_idx, (data, class_target, reg_target) in enumerate(train_loader):
        data, class_target, reg_target = data.to(device), class_target.to(device), reg_target.to(device)
        optimizer_class.zero_grad()
        optimizer_reg.zero_grad()
        
        class_output = model_class(data)
        class_target = class_target.reshape(1, -1).squeeze()
        class_loss = F.nll_loss(class_output, class_target)

        _,pred_t = torch.max(class_output, dim=1)
        reg_output = model_reg(torch.cat((data, pred_t.unsqueeze(1)), dim=1))
        reg_loss = criterion_regression(reg_output, reg_target)

        class_loss.backward()
        reg_loss.backward()
        
        optimizer_class.step()
        optimizer_reg.step()

        train_loss_class.append(class_loss.item())
        train_loss_reg.append(reg_loss.item())
        
        all_labels.extend(class_target.cpu().numpy())
        all_predictions.extend(pred_t.cpu().numpy())

        mae_reg_error.append(mean_absolute_error(reg_target, reg_output.detach().numpy()))
        
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss Class: {:.6f} \tLoss Reg: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                       100. * batch_idx / len(train_loader), class_loss.item(), reg_loss.item()))
            
    MAE = sum(mae_reg_error) / len(mae_reg_error)
    train_loss_class = sum(train_loss_class) / len(train_loss_class)
    train_loss_reg = sum(train_loss_reg) / len(train_loss_reg)
    accuracy = accuracy_score(all_labels, all_predictions)
    f1 = f1_score(all_labels, all_predictions, average='weighted')
    print(f"TRAIN: MAE = {MAE}   Accuracy = {accuracy}    F1 = {f1}\n\n")
    
    writer.add_scalar(f'Training Class Loss (Cascade{number_of_model})', train_loss_class, epoch)
    writer.add_scalar(f'Training Reg Loss (Cascade1{number_of_model})', train_loss_reg, epoch)
    writer.add_scalar(f'Training MAE (Cascade{number_of_model})', MAE, epoch)
    writer.add_scalar(f'Training Accuracy (Cascade{number_of_model})', accuracy, epoch)
    writer.add_scalar(f'Training F1 Score (Cascade{number_of_model})', f1, epoch)

def testCascade(model_classs, model_reg, device, test_loader):
    model_classs.eval()
    model_reg.eval()
    test_loss_class = []
    test_loss_reg = []
    all_labels = []
    all_predictions = []
    mae_reg_error = []
    
    with torch.no_grad():
        for batch_idx, (data, class_target, reg_target) in enumerate(test_dataloader):
            data, class_target, reg_target = data.to(device), class_target.to(device), reg_target.to(device)
            
            class_output = model_classs(data)
            _,pred_t = torch.max(class_output, dim=1)
            reg_output = model_reg(torch.cat((data, pred_t.unsqueeze(1)), dim=1))
            
            class_target = class_target.reshape(1, -1).squeeze()
            class_loss = torch.nn.functional.nll_loss(class_output, class_target, reduction='sum').item()  # sum up batch loss
            reg_loss = criterion_regression(reg_output, reg_target)

            test_loss_class.append(class_loss)
            test_loss_reg.append(reg_loss.item())
        
            _, predictions = torch.max(class_output, 1)
            all_labels.extend(class_target.cpu().numpy())
            all_predictions.extend(predictions.cpu().numpy())
            
            mae_reg_error.append(mean_absolute_error(reg_target, reg_output))
    
    MAE = sum(mae_reg_error) / len(mae_reg_error)
    test_loss_class = sum(test_loss_class) / len(test_loss_class)
    test_loss_reg = sum(test_loss_reg) / len(test_loss_reg)
    accuracy = accuracy_score(all_labels, all_predictions)
    f1 = f1_score(all_labels, all_predictions, average='weighted')
    print(f"TEST: MAE = {MAE}   Accuracy = {accuracy}    F1 = {f1}\n\n")
    writer.add_scalar(f'Testing Class Loss (MyMultiTaskNet{number_of_model})', test_loss_class, epoch)
    writer.add_scalar(f'Testing Reg Loss (MyMultiTaskNet{number_of_model})', test_loss_reg, epoch)
    writer.add_scalar(f'Testing MAE (MyMultiTaskNet{number_of_model})', MAE, epoch)
    writer.add_scalar(f'Testing Accuracy (MyMultiTaskNet{number_of_model})', accuracy, epoch)
    writer.add_scalar(f'Testing F1 Score (MyMultiTaskNet{number_of_model})', f1, epoch)

In [132]:
#batch_size = 64
epochs = 5
log_interval = 200
criterion_regression = nn.MSELoss()
optimizer_class = optim.Adam(model_classs.parameters(), lr=0.001)
optimizer_reg = optim.Adam(model_reg.parameters(), lr=0.001)

writer = SummaryWriter("/Users/ninelco/Documents/Innopolis/F23/ML/Assignment2")

for epoch in range(1, epochs + 1):
    trainCascade(model_classs, model_reg, device, train_dataloader, optimizer_class, optimizer_reg, epoch, log_interval)
    testCascade(model_classs, model_reg, device, test_dataloader)

writer.close()

TRAIN: MAE = 6.3166157386793556   Accuracy = 0.4581649926644848    F1 = 0.4462272542836215


TEST: MAE = 6.694173560018842   Accuracy = 0.4678824538437232    F1 = 0.4503486529077839


TRAIN: MAE = 6.308258836450129   Accuracy = 0.4647556709175037    F1 = 0.4484847694280902


TEST: MAE = 6.679978230501114   Accuracy = 0.4783099354489234    F1 = 0.45954005542823456


TRAIN: MAE = 6.3069045325048565   Accuracy = 0.4673964563818982    F1 = 0.45291108336670494


TEST: MAE = 6.676577662871963   Accuracy = 0.4768654358326186    F1 = 0.4591973653924061


TRAIN: MAE = 6.305986437470474   Accuracy = 0.46933754655230786    F1 = 0.4564791730740656


TEST: MAE = 6.680291362729471   Accuracy = 0.48431363697919017    F1 = 0.46241445371736584


TRAIN: MAE = 6.305791368381211   Accuracy = 0.46948425685588535    F1 = 0.45209465330589393


TEST: MAE = 6.681216336121133   Accuracy = 0.4784904979009615    F1 = 0.45737774311772583




# Conclusion

Was made two new models (multitask and cascade) Was calculated loss, mae (error in minutes), accuracy and f1.

batch_size = 64 Adam(lr=0.001) (model 1)

*MuktiTask*

TEST: MAE = 6.470548434628533   Accuracy = 0.46300726763869454    F1 = 0.4368899685484079 (evaluate stopped - local minimum)

*Cascade*

TEST: MAE = 6.47104716644507   Accuracy = 0.4819663251026949    F1 = 0.45667140737774714 (evaluate stopped - local minimum)

----------
batch_size = 32 Adam(lr=0.001) (model 2) - results are similar, calculate more time, better use 64

*MuktiTask*

TEST: MAE = 6.467062409305985   Accuracy = 0.46887554732993275    F1 = 0.4518291777498136

*Cascade*

TEST: MAE = 6.487257389795213   Accuracy = 0.4859838396605426    F1 = 0.45839697858374007
________
batch_size = 64 Adam(lr=0.01) (model 3)

*MuktiTask*

TEST: MAE = 6.402300580434222   Accuracy = 0.4539791450367896    F1 = 0.4397245795172217

*Cascade*

TEST: MAE = 6.681216336121133   Accuracy = 0.4784904979009615    F1 = 0.45737774311772583

### Another models
For upgrade results was changed: optimizers, layers, batch_sizes, epochs, weights of losses - but there are no significant changes. Model find local minimum and can't go out from it. 
Also try change time_status: if early then to minutes add minus, but it was worse for results. In SGD some times was gradient vanishing.