# Кейс ЦБ РФ

## Подключение нужных библиотек

In [58]:
import numpy as np
import torch
from tqdm import tqdm

## Создаем Dataset для дальнейшей работы

In [45]:
class CbrDataset(torch.utils.data.Dataset):
    def __init__(self, inp_file, out_file):
        self.x_data = np.load(inp_file)
        self.y_data = np.load(out_file)
        
    def __len__(self):
        return len(self.x_data)
        
    def __getitem__(self, idx):
        inputs = torch.from_numpy(self.x_data[idx])
        outputs = torch.from_numpy(self.y_data[idx]) 
        return (torch.flatten(inputs), torch.flatten(outputs))  

## Создаем нужную нейросеть

In [47]:
class CbrNet(torch.nn.Module):
  def __init__(self):
    super(CbrNet, self).__init__()
    self.hid1 = torch.nn.Linear(600, 300)  
    self.hid2 = torch.nn.Linear(300, 100)
    self.oupt = torch.nn.Linear(100, 15)

    torch.nn.init.xavier_uniform_(self.hid1.weight) 
    torch.nn.init.zeros_(self.hid1.bias)
    torch.nn.init.xavier_uniform_(self.hid2.weight)
    torch.nn.init.zeros_(self.hid2.bias)
    torch.nn.init.xavier_uniform_(self.oupt.weight)
    torch.nn.init.zeros_(self.oupt.bias)

  def forward(self, x):
    z = torch.tanh(self.hid1(x)) 
    z = torch.tanh(self.hid2(z))
    z = self.oupt(z)  
    return z


## Обучение модели

In [59]:
def train(model, ds, bs, lr, me, le):
    train_ldr = torch.utils.data.DataLoader(ds, batch_size=bs, shuffle=True)
    loss_func = torch.nn.L1Loss()  
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    for epoch in tqdm(range(0, me)): # RSF
        epoch_loss = 0.0  
        for (b_idx, batch) in enumerate(train_ldr):
            X = batch[0]
            X = X.to(torch.float32)
            X = X.to(device) # RSF
            y = batch[1]
            y = y.to(torch.float32)
            y = y.to(device) # RSF
            optimizer.zero_grad()
            oupt = model(X)
            loss_val = loss_func(oupt, y)  
            epoch_loss += loss_val.item()  
            loss_val.backward()  
            optimizer.step()     

        if epoch % le == 0:
            print(f"'эпоха = {epoch: 04d}  |  loss = {epoch_loss:0.4f}")
            torch.save(model, f"./results/model_{epoch:04d}")
    
    torch.save(model, f"./results/model_final.pt")
        

In [None]:
def accuracy(model, ds, pct_close):
    n_correct = 0; n_wrong = 0
    data_ldr =  torch.utils.data.DataLoader(ds, batch_size=1, shuffle=False)
    for (b_ix, batch) in tqdm(enumerate(ds)):
        X = batch[0]
        X = X.to(torch.float32)
        X = X.to(device) # RSF
        Y = batch[1]  
        Y = Y.to(torch.float32)
        Y = Y.to(device) # RSF
        with torch.no_grad():
            oupt = model(X)  
        if torch.abs(oupt[0] - Y[0]) < torch.abs(pct_close * Y[0]) and torch.abs(oupt[1] - Y[1]) < torch.abs(pct_close * Y[1]):
            n_correct += 1
        else:
            n_wrong += 1
    return (n_correct * 1.0) / (n_correct + n_wrong)

## Главная программа

In [49]:
# 0. Начало
print("\nРешение кейса ЦБ России с использованием PyTorch")
np.random.seed(42) 
torch.manual_seed(42) 

# Проверяем наличие GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Используем устройство:', device)


Решение кейса ЦБ России с использованием PyTorch
Используем устройство: cuda


In [69]:
# 1. Создание объектов Dataset и DataLoader
print("\nЗагрузка обучающего Dataset")
train_input_file = f"./data/y_smp_train.npy"
train_output_file = f"./data/pars_smp_train.npy"
train_dataset = CbrDataset(train_input_file, train_output_file)
trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=16, shuffle=True) # RSF


Загрузка обучающего Dataset


In [51]:
data_batch, labels_batch = next(iter(trainloader))
print(data_batch.size())
# out: torch.Size([16, 3, 32, 32])
print(labels_batch.size())
# out: torch.Size([16])

#  test_file = ".\\Data\\boston_test.txt"
#  test_ds = BostonDataset(test_file)

torch.Size([16, 600])
torch.Size([16, 15])


In [52]:
# 2. Создание модели
print("\nСоздание 600-(300-100)-15 регрессионной нейросети")
net = CbrNet().to(device)
net.train()


Создание 600-(300-100)-15 регрессионной нейросети


CbrNet(
  (hid1): Linear(in_features=600, out_features=300, bias=True)
  (hid2): Linear(in_features=300, out_features=100, bias=True)
  (oupt): Linear(in_features=100, out_features=15, bias=True)
)

In [60]:
# 3. Обучаем модель
bs=10 # размер батча
lr=0.005 
me=50 # Максимальное количество эпох
le=10 # Точки выдачи отчета

print(f"\nbatch size = {bs}", end = " ")
print("функция потерь = L1Loss()", end = " ")
print("оптимизатор = Adam", end = " ")
print(f"learn rate = {lr}", end = " ")
print(f"количество эпох = {me}")

print("\nНачало обучения ")
train(net, train_dataset, bs, lr, me=50, le=10)
print("Завершение")


batch size = 10 функция потерь = L1Loss() оптимизатор = Adam learn rate = 0.005 количество эпох = 50

Начало обучения 


  0%|                                                                                           | 0/50 [00:00<?, ?it/s]

epoch =    0  |  loss = 21416.6048


 20%|███████████████▊                                                               | 10/50 [49:51<3:17:44, 296.61s/it]

epoch =   10  |  loss = 21310.0089


 40%|██████████████████████████████▊                                              | 20/50 [1:40:46<2:30:44, 301.50s/it]

epoch =   20  |  loss = 21290.5060


 60%|██████████████████████████████████████████████▏                              | 30/50 [2:32:05<1:41:16, 303.81s/it]

epoch =   30  |  loss = 21260.3964


 80%|███████████████████████████████████████████████████████████████▏               | 40/50 [3:21:28<49:21, 296.18s/it]

epoch =   40  |  loss = 21136.3213


100%|███████████████████████████████████████████████████████████████████████████████| 50/50 [4:11:07<00:00, 301.36s/it]


Завершение


In [None]:
# Загрузка модели
net = torch.load(f"./results/model_final.pt")

In [1]:
# 4. Точность модели
#net.eval()
#acc_train = accuracy(net, train_dataset, 0.20)
#print(f"\nТочность обучения (внутри 0.20) = {acc_train:%0.4f}")
#acc_test = accuracy(net, test_dataset, 0.20)
#print(f"Точность на тесте (внутри 0.20) = {acc_test:%0.4f}")

In [None]:
# 5. Использование модели
print("\nPredicting normalized (poverty, price) first train")
print("Actual (poverty, price) = (0.0914, 0.2160) ")

x = np.array([0.000273, 0.000, 0.0707, -1, 0.469, 0.6421, 0.789, 0.049671, 0.02, 0.242, 0.178, 0.39690], dtype=np.float32)
x = torch.tensor(x, dtype=torch.float32)

with torch.no_grad():
    oupt = net(x)
print("Predicted poverty price = %0.4f %0.4f " % (oupt[0], oupt[1]))
print("\nEnd demo ")

In [20]:
#trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=1, shuffle=True) # RSF
#data_batch, labels_batch = next(iter(trainloader))
#print(data_batch.size())
#print(labels_batch.size())

torch.Size([1, 600])
torch.Size([1, 15])


In [82]:
testfile = np.load(f"./data/y_smp_test.npy")
#print(testfile.shape)
for i in range(testfile.shape[0]):
    if i > 0:
        break
    #print(testfile[i])
    X = testfile[i]
    inp = torch.from_numpy(X)
    inp = inp.to(torch.float32)
    inp = torch.flatten(inp)
    inp = inp.to(device) 
    with torch.no_grad():
        outp = net(inp)
        print(outp)

tensor([ 1.7573,  1.5800,  0.5893,  0.3319,  1.4420,  0.1287,  1.1085,  2.0747,
         0.4631,  0.5308,  0.5701, -0.0925,  0.0801,  0.0191,  0.0567],
       device='cuda:0')


In [None]:
with open('./data/random_submit_new.npy', 'wb') as submit:
    np.save(submit, np.array([1, 2]))
    np.save(submit, np.array([1, 3]))
with open('./data/random_submit_new.npy', 'rb') as submit:
    a = np.load(submit)
    b = np.load(submit)
print(a, b)
#for i in range(submit.shape)

In [83]:
with open('./data/random_submit.npy', 'rb') as submit:
    print(submit[0])

TypeError: '_io.BufferedReader' object is not subscriptable

In [None]:
def repeat_number(outpa, num):
    arr = []
    for elem in outpa:
        arr_t = []
        for i in range(num):
            arr_t.append(elem)
        arr.append(arr_t)
    return(arr)

testfile = np.load(f"./data/y_smp_test.npy")
#print(testfile.shape)
submit = open('./data/random_submit_new.npy', 'wb')
submit_arr = np.empty((100000, 15, 6))
for i in range(testfile.shape[0]):
    print(f"{i:05d}", end="\r")
    #print(testfile[i])
    X = testfile[i]
    inp = torch.from_numpy(X)
    inp = inp.to(torch.float32)
    inp = torch.flatten(inp)
    inp = inp.to(device) 
    with torch.no_grad():
        outp = net(inp)
        outpa = outp.detach().cpu().numpy()
        outpa = repeat_number(outpa, 6)
        outp = np.array(outpa)
        submit_arr[i] = outp
print()
np.save(submit, submit_arr)
submit.close