In [1]:
import torch
from torch.optim import Adam
from torch.utils.data import TensorDataset, DataLoader
from torch import nn

import numpy as np
from sklearn.metrics import accuracy_score

In [2]:
np.random.seed(42)
torch.manual_seed(42)

MIN_INCOME = 15000
MAX_INCOME = 70000
MAX_AGE = 50
MIN_AGE = 19

In [3]:
def generate_dataset():
    clients = []
    labels = []
    # тут можно обойтись без циклов, просто рандомя вектора размеров 879, 180 и т.д.
    for _ in range(879):
        # можете поиграться с верхними границами/распределениями
        income = np.random.uniform(44861.79, MAX_INCOME)
        # скорее всего, у человека с деньгами есть высшее образование :)
        education = int(np.random.rand() < 0.9)
        age = np.random.uniform(MIN_AGE, MAX_AGE)
        clients.append([income, education, age])
        # 0 как на картинке из задания
        labels.append(0)
    for i in range(180):
        income = np.random.uniform(33270.53, 44861.79)
        education = 1
        age = np.random.uniform(MIN_AGE, MAX_AGE)
        clients.append([income, education, age])
        # можете этот np.round посчитать ручками
        if i < np.round(180 * 0.961):
            labels.append(0)
        else:
            labels.append(1)
    for i in range(15):
        income = np.random.uniform(33270.53, 44861.79)
        education = 0
        age = np.random.uniform(MIN_AGE, MAX_AGE)
        clients.append([income, education, age])
        labels.append(1)
    for i in range(71):
        income = np.random.uniform(28790.85, 33270.53)
        education = int(np.random.uniform() < 0.5)
        age = np.random.uniform(28.5, MAX_AGE)
        clients.append([income, education, age])
        if i < np.round(71 * 0.592):
            labels.append(0)
        else:
            labels.append(1)
    for i in range(114):
        income = np.random.uniform(MIN_INCOME, 28790.85)
        education = int(np.random.uniform() < 0.5)
        age = np.random.uniform(MIN_AGE, MAX_AGE)
        clients.append([income, education, age])
        if i < np.round(114 * 0.614):
            labels.append(1)
        else:
            labels.append(0)
    for i in range(89):
        income = np.random.uniform(MIN_INCOME, 33270.53)
        education = int(np.random.uniform() < 0.5)
        age = np.random.uniform(MIN_AGE, 28.5)
        clients.append([income, education, age])
        if i < np.round(89 * 0.933):
            labels.append(1)
        else:
            labels.append(0)
    
    return np.array(clients), np.array(labels)

In [4]:
train, test = generate_dataset(), generate_dataset()

In [5]:
X_train, y_train = train
X_test, y_test = test

In [6]:
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder

In [7]:
train_incomes, train_educations, train_ages = X_train[:, 0], X_train[:, 1], X_train[:, 2]
test_incomes, test_educations, test_ages = X_test[:, 0], X_test[:, 1], X_test[:, 2]

In [8]:
income_scaler = MinMaxScaler()
train_incomes = income_scaler.fit_transform(train_incomes.reshape(-1, 1))
test_incomes = income_scaler.transform(test_incomes.reshape(-1, 1))

# one-hot encoding
education_encoder = OneHotEncoder()
train_educations = education_encoder.fit_transform(train_educations.reshape(-1, 1))
test_educations = education_encoder.transform(test_educations.reshape(-1, 1))

age_scaler = MinMaxScaler()
train_ages = age_scaler.fit_transform(train_ages.reshape(-1, 1))
test_ages = age_scaler.transform(test_ages.reshape(-1, 1))

In [9]:
X_train = np.concatenate([train_incomes, train_educations.toarray(), train_ages], axis=1)
X_test = np.concatenate([test_incomes, test_educations.toarray(), test_ages], axis=1)

In [10]:
X_train, y_train = torch.Tensor(X_train), torch.from_numpy(y_train)
train_ds = TensorDataset(X_train, y_train)

X_test, y_test = torch.Tensor(X_test), torch.from_numpy(y_test)
test_ds = TensorDataset(X_test, y_test)

In [11]:
train_dl = DataLoader(train_ds, batch_size=100, shuffle=True)
test_dl = DataLoader(test_ds, batch_size=100)

In [12]:
class ANN(nn.Module):
    def __init__(self):
        super(ANN, self).__init__()
        self.linear1 = nn.Linear(4, 6)
        self.linear2 = nn.Linear(6, 2)
    
    def forward(self, x):
        x = torch.tanh(self.linear1(x))
        return self.linear2(x)

In [13]:
ann = ANN()
criterion = nn.CrossEntropyLoss()
optimizer = Adam(ann.parameters())

In [14]:
for epoch in range(3):
    ann.train()
    for x, y in train_dl:
        logits = ann(x)
        loss = criterion(logits, y)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    ann.eval()
    with torch.no_grad():
        preds = []
        ys = []
        
        for x, y in test_dl:
            preds += torch.argmax(ann(x), axis=1).numpy().tolist()
            ys += y.numpy().tolist()
        
        acc = accuracy_score(preds, ys)
        print(f'Accuracy for epoch {epoch + 1} is {acc}')
            

Accuracy for epoch 1 is 0.8538575667655787
Accuracy for epoch 2 is 0.8612759643916914
Accuracy for epoch 3 is 0.905786350148368
