In [2]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [3]:
# Загрузка данных
# df = pd.read_csv('your_data.csv')

# Предположим, что мы работаем с небольшим фрагментом для демонстрации
data = {
    'employee_id': ['001', '002', '001', '003', '002'],
    'communication_score': [90, 80, 85, 70, 95],
    'util_flg': [1, 0, 1, 0, 1],
    'course_0': [1.0, 0.0, 1.0, 0.0, 1.0],
    'course_1': [0.0, 1.0, 0.0, 1.0, 0.0],
    'course_2': [1.0, 1.0, 0.0, 1.0, 1.0],
}
df = pd.DataFrame(data)

# Кодирование категориальных переменных
df['employee_id'] = df['employee_id'].astype('category').cat.codes

# Нормализация данных
scaler = StandardScaler()
df[['communication_score']] = scaler.fit_transform(df[['communication_score']])

# Разделение данных
X = df.drop(columns=['course_0', 'course_1', 'course_2']).values
y = df[['course_0', 'course_1', 'course_2']].values

# Сохранение индексов и разделение данных с сохранением индексов
X_train, X_test, y_train, y_test = train_test_split(df.drop(columns=['course_0', 'course_1', 'course_2']), df[['course_0', 'course_1', 'course_2']], test_size=0.2, random_state=42)

In [4]:
df

Unnamed: 0,employee_id,communication_score,util_flg,course_0,course_1,course_2
0,0,0.697486,1,1.0,0.0,1.0
1,1,-0.464991,0,0.0,1.0,1.0
2,0,0.116248,1,1.0,0.0,0.0
3,2,-1.627467,0,0.0,1.0,1.0
4,1,1.278724,1,1.0,0.0,1.0


In [5]:
class NeuralNet(nn.Module):
    def __init__(self, input_features):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_features, 64)  # Динамически определяем входные признаки
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 3)  # Предсказываем 3 курса
        self.relu = nn.ReLU()
        
    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))  # Sigmoid для предсказания вероятности прохождения курса
        return x

# Создаем экземпляр модели с правильным количеством входных признаков
input_features = X_train.drop(columns=['employee_id']).shape[1]
model = NeuralNet(input_features).cuda()  # Перемещаем модель на GPU

In [6]:
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Подготовка данных для обучения
train_data = torch.tensor(X_train.drop(columns=['employee_id']).values, dtype=torch.float32).cuda()
train_labels = torch.tensor(y_train.values, dtype=torch.float32).cuda()
test_data = torch.tensor(X_test.drop(columns=['employee_id']).values, dtype=torch.float32).cuda()
test_labels = torch.tensor(y_test.values, dtype=torch.float32).cuda()

# Обучение
epochs = 50
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(train_data)
    loss = criterion(outputs, train_labels)
    loss.backward()
    optimizer.step()
    
    if epoch % 10 == 0:
        model.eval()
        with torch.no_grad():
            test_outputs = model(test_data)
            test_loss = criterion(test_outputs, test_labels)
            print(f'Epoch {epoch+1}, Loss: {loss.item()}, Test Loss: {test_loss.item()}')

Epoch 1, Loss: 0.6781997680664062, Test Loss: 0.733923614025116
Epoch 11, Loss: 0.5740320682525635, Test Loss: 0.7023954391479492
Epoch 21, Loss: 0.47735723853111267, Test Loss: 0.6589376926422119
Epoch 31, Loss: 0.38129347562789917, Test Loss: 0.6106792092323303
Epoch 41, Loss: 0.2964262366294861, Test Loss: 0.5554968118667603


In [7]:
# Получение предсказаний модели на тестовом наборе
model.eval()  # Перевод модели в режим оценки
with torch.no_grad():
    predictions = model(test_data)

# Преобразование тензора вероятностей в numpy массив
predicted_probabilities = predictions.cpu().numpy()

# Создание DataFrame для наглядности результатов
result_df = pd.DataFrame(predicted_probabilities, columns=['Course_0_Probability', 'Course_1_Probability', 'Course_2_Probability'])
result_df['Employee_IDs'] = X_test['employee_id'].values  # Использование сохранённых ID сотрудников из X_test
print(result_df)

   Course_0_Probability  Course_1_Probability  Course_2_Probability  \
0              0.460342              0.566851              0.722211   

   Employee_IDs  
0             1  


In [8]:
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [9]:
# Установка параметра отображения всех строк
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [10]:
# Загрузка данных
data = pd.read_csv('test_data.csv')

In [11]:
data.head()

Unnamed: 0.1,Unnamed: 0,employee_id,sex,region,age,head_employee_id,exp_days,edu_degree,department_id,work_online_flg,communication_id,communication_dt,communication_score,util_flg,end_dt,course_0,course_1,course_2,course_3,course_4,course_5,course_6,course_7,course_8,course_9,course_10,course_11,course_12,course_13,course_14,course_15,course_16,course_17,course_18,course_19,course_20,course_21,course_22,course_23,course_24,course_25,course_26,course_27,course_28,course_29,course_30,course_31,course_32,course_33,course_34,course_35,course_36,course_37,course_38,course_39,course_40,course_41,course_42,course_43,course_44,course_45,course_46,course_47,course_48,course_49,course_50,course_51,course_52,course_53,course_54,course_55,course_56,course_57,course_58,course_59,course_60,course_61,course_62,course_63,course_64,course_65,course_66,course_67,course_68,course_69,course_70,course_71,course_72,course_73,course_74,course_75,course_76,course_77,course_78,course_79,course_80,course_81,course_82,course_83,course_84,course_85,course_86,course_87,course_88,course_89,course_90,course_91
0,2077144,f57a2f55-7b09-8c43-f11a-b969efe1504b,M,6,25,c74d97b0-1eae-257e-44aa-9d5bade97baf,904,2,1,0,219306075423967632,2023-01-01,79,0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2077143,f57a2f55-7b09-8c43-f11a-b969efe1504b,M,6,25,c74d97b0-1eae-257e-44aa-9d5bade97baf,904,2,1,0,909573595576702601,2023-01-01,0,1,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2077142,f57a2f55-7b09-8c43-f11a-b969efe1504b,M,6,25,c74d97b0-1eae-257e-44aa-9d5bade97baf,904,2,1,0,175962335565615960,2023-01-01,32,0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2077141,f57a2f55-7b09-8c43-f11a-b969efe1504b,M,6,25,c74d97b0-1eae-257e-44aa-9d5bade97baf,904,2,1,0,767071566842191283,2023-01-01,100,1,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2077140,f57a2f55-7b09-8c43-f11a-b969efe1504b,M,6,25,c74d97b0-1eae-257e-44aa-9d5bade97baf,904,2,1,0,93956815333286002,2023-01-01,100,1,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
# Преобразование дат
data['communication_dt'] = pd.to_datetime(data['communication_dt'])
data['end_dt'] = pd.to_datetime(data['end_dt'])

In [13]:
# Установка временного индекса
data.set_index('communication_dt', inplace=True)

In [14]:
# Сортировка данных по сотруднику и дате коммуникации
data.sort_values(by=['employee_id', 'communication_dt'], inplace=True)

In [15]:
# Расчет скользящего среднего для 30 дней до и после каждой даты для каждого сотрудника
data['communication_score_before'] = data.groupby('employee_id')['communication_score']\
    .rolling(window='30D', closed='left').mean().shift(1).reset_index(level=0, drop=True)

data['communication_score_after'] = data.groupby('employee_id')['communication_score']\
    .rolling(window='30D', closed='right').mean().shift(-1).reset_index(level=0, drop=True)

In [16]:
# Сброс индекса для возвращения к исходному формату
data.reset_index(inplace=True)

In [17]:
# Расчет изменения и сохранение в новый столбец
data['communication_score_change'] = data['communication_score_after'] - data['communication_score_before']

In [18]:
# Вывод первых нескольких строк данных с новым столбцом
data[['employee_id', 'communication_dt', 'communication_score_before', 'communication_score_after', 'communication_score_change']].tail(20)

Unnamed: 0,employee_id,communication_dt,communication_score_before,communication_score_after,communication_score_change
2097,f57a2f55-7b09-8c43-f11a-b969efe1504b,2023-12-17,64.669014,64.1,-0.569014
2098,f57a2f55-7b09-8c43-f11a-b969efe1504b,2023-12-17,64.669014,64.337748,-0.331266
2099,f57a2f55-7b09-8c43-f11a-b969efe1504b,2023-12-17,64.669014,63.914474,-0.75454
2100,f57a2f55-7b09-8c43-f11a-b969efe1504b,2023-12-17,64.669014,64.150327,-0.518687
2101,f57a2f55-7b09-8c43-f11a-b969efe1504b,2023-12-17,64.669014,63.915584,-0.75343
2102,f57a2f55-7b09-8c43-f11a-b969efe1504b,2023-12-17,64.669014,63.503226,-1.165788
2103,f57a2f55-7b09-8c43-f11a-b969efe1504b,2023-12-17,64.669014,63.448718,-1.220296
2104,f57a2f55-7b09-8c43-f11a-b969efe1504b,2023-12-17,64.669014,63.493333,-1.175681
2105,f57a2f55-7b09-8c43-f11a-b969efe1504b,2023-12-18,64.669014,63.635762,-1.033252
2106,f57a2f55-7b09-8c43-f11a-b969efe1504b,2023-12-18,63.448718,63.217105,-0.231613


In [19]:
# Добавляем столбцы с годом, месяцем и днем
data['year'] = data['communication_dt'].dt.year
data['month'] = data['communication_dt'].dt.month
data['day'] = data['communication_dt'].dt.day

# Удаляем лишние столбцы
data = data.drop(columns=['head_employee_id', 'Unnamed: 0', 'communication_id', 'communication_dt',
                                    'communication_score_before', 'communication_score_after', 'communication_score', 'util_flg', 'end_dt'])

In [20]:
# Инициализация кодировщиков и масштабировщика
le_employee = LabelEncoder()
le_head_employee = LabelEncoder()
le_sex = LabelEncoder()
scaler = StandardScaler()

In [21]:
# Проверяем типы данных в столбцах
print(data.dtypes)

# Кодируем категориальные переменные еще раз, если это необходимо
data['employee_id'] = 1
#data['employee_id'] = le_employee.fit_transform(data['employee_id'].astype(str))
data['sex'] = le_sex.fit_transform(data['sex'].astype(str))

# Масштабирование числовых переменных
data['age'] = scaler.fit_transform(data[['age']])
data['exp_days'] = scaler.fit_transform(data[['exp_days']])

employee_id                    object
sex                            object
region                          int64
age                             int64
exp_days                        int64
edu_degree                      int64
department_id                   int64
work_online_flg                 int64
course_0                      float64
course_1                      float64
course_2                      float64
course_3                      float64
course_4                      float64
course_5                      float64
course_6                      float64
course_7                      float64
course_8                      float64
course_9                      float64
course_10                     float64
course_11                     float64
course_12                     float64
course_13                     float64
course_14                     float64
course_15                     float64
course_16                     float64
course_17                     float64
course_18   

In [22]:
data['employee_id'].unique()

array([1], dtype=int64)

In [23]:
# Определение полезности прохождения курса
data['positive_change'] = (data['communication_score_change'] > 0).astype(int)

In [24]:
data = data.dropna(subset='communication_score_change')

In [25]:
data.head()

Unnamed: 0,employee_id,sex,region,age,exp_days,edu_degree,department_id,work_online_flg,course_0,course_1,course_2,course_3,course_4,course_5,course_6,course_7,course_8,course_9,course_10,course_11,course_12,course_13,course_14,course_15,course_16,course_17,course_18,course_19,course_20,course_21,course_22,course_23,course_24,course_25,course_26,course_27,course_28,course_29,course_30,course_31,course_32,course_33,course_34,course_35,course_36,course_37,course_38,course_39,course_40,course_41,course_42,course_43,course_44,course_45,course_46,course_47,course_48,course_49,course_50,course_51,course_52,course_53,course_54,course_55,course_56,course_57,course_58,course_59,course_60,course_61,course_62,course_63,course_64,course_65,course_66,course_67,course_68,course_69,course_70,course_71,course_72,course_73,course_74,course_75,course_76,course_77,course_78,course_79,course_80,course_81,course_82,course_83,course_84,course_85,course_86,course_87,course_88,course_89,course_90,course_91,communication_score_change,year,month,day,positive_change
7,1,0,6,0.0,0.0,2,1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.388889,2023,1,2,0
8,1,0,6,0.0,0.0,2,1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-6.4,2023,1,3,0
9,1,0,6,0.0,0.0,2,1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.352273,2023,1,3,1
10,1,0,6,0.0,0.0,2,1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.041667,2023,1,3,0
11,1,0,6,0.0,0.0,2,1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.778846,2023,1,3,1


In [26]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2109 entries, 7 to 2115
Columns: 105 entries, employee_id to positive_change
dtypes: float64(95), int32(5), int64(5)
memory usage: 1.7 MB


In [27]:
# Метки определяются на основе прохождения курсов и положительного изменения
labels = data[[f'course_{i}' for i in range(92)]] * data['positive_change'].values[:, None]

# Признаки для модели
features = data.drop(columns=['communication_score_change'] + [f'course_{i}' for i in range(92)])

# Разделение данных
X_train, X_test, Y_train, Y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

In [28]:
X_test['employee_id'].unique()

array([1], dtype=int64)

In [29]:
# Определение модели
class NeuralNet(nn.Module):
    def __init__(self, input_features, num_courses=92):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_features, 128)
        self.fc2 = nn.Linear(128, 128)
        self.output_layer = nn.Linear(128, num_courses)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.sigmoid(self.output_layer(x))
        return x

In [30]:
model = NeuralNet(input_features=X_train.shape[1]).cuda()

# Тренировка и оценка модели, аналогично предыдущему описанию
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [31]:
# Повторное создание тензоров после очистки и преобразования типов
train_features = torch.tensor(X_train.values, dtype=torch.float32).cuda()
train_targets = torch.tensor(Y_train.values, dtype=torch.float32).cuda()
test_features = torch.tensor(X_test.values, dtype=torch.float32).cuda()
test_targets = torch.tensor(Y_test.values, dtype=torch.float32).cuda()

In [32]:
# Обучение модели
for epoch in range(50):
    model.train()
    optimizer.zero_grad()
    outputs = model(train_features)
    loss = criterion(outputs, train_targets)
    loss.backward()
    optimizer.step()
    
    if epoch % 5 == 0:
        print(f'Epoch {epoch+1}/50, Loss: {loss.item()}')

Epoch 1/50, Loss: 43.750308990478516
Epoch 6/50, Loss: 38.05623245239258
Epoch 11/50, Loss: 34.948360443115234
Epoch 16/50, Loss: 34.78260803222656
Epoch 21/50, Loss: 34.78260803222656
Epoch 26/50, Loss: 34.78260803222656
Epoch 31/50, Loss: 34.78260803222656
Epoch 36/50, Loss: 34.78260803222656
Epoch 41/50, Loss: 34.78260803222656
Epoch 46/50, Loss: 34.78260803222656


In [426]:
# Переключаем модель в режим оценки
model.eval()
with torch.no_grad():
    test_predictions = model(test_features)

In [427]:
# Преобразуем тензор вероятностей в DataFrame
predictions_df = pd.DataFrame(test_predictions.cpu().numpy(), columns=[f'course_{i}' for i in range(92)])

In [1]:
# Добавление индекса сотрудников в DataFrame для идентификации
predictions_df['employee_id'] = X_test['employee_id'].values 

NameError: name 'X_test' is not defined

In [429]:
# Сортировка курсов по вероятности для каждого сотрудника
def sort_courses(row):
    return row.sort_values(ascending=False).iloc[:-1].index.tolist()  # Исключаем столбец employee_id

In [430]:
predictions_df.head()

Unnamed: 0,course_0,course_1,course_2,course_3,course_4,course_5,course_6,course_7,course_8,course_9,course_10,course_11,course_12,course_13,course_14,course_15,course_16,course_17,course_18,course_19,course_20,course_21,course_22,course_23,course_24,course_25,course_26,course_27,course_28,course_29,course_30,course_31,course_32,course_33,course_34,course_35,course_36,course_37,course_38,course_39,course_40,course_41,course_42,course_43,course_44,course_45,course_46,course_47,course_48,course_49,course_50,course_51,course_52,course_53,course_54,course_55,course_56,course_57,course_58,course_59,course_60,course_61,course_62,course_63,course_64,course_65,course_66,course_67,course_68,course_69,course_70,course_71,course_72,course_73,course_74,course_75,course_76,course_77,course_78,course_79,course_80,course_81,course_82,course_83,course_84,course_85,course_86,course_87,course_88,course_89,course_90,course_91,employee_id
0,5.3522850000000003e-17,0.0,0.0,2.693563e-28,4.4599429999999996e-38,0.0,1.0,0.0,1.1820020000000001e-23,1.0,0.0,1.0,1.0,7.900872e-18,2.259319e-15,7.127536e-39,0.0,1.0,0.0,8.495341e-13,0.0,5.0722540000000005e-18,0.0,1.150279e-27,0.0,5.014602e-12,1.767675e-20,0.0,9.323725e-13,0.0,0.0,1.1933720000000002e-33,2.3340240000000003e-25,1.151595e-22,0.0,3.9053759999999995e-38,2.164967e-09,0.0,0.0,1.0,1.859166e-16,1.708163e-24,2.026384e-23,0.0,1.0,6.795558e-19,0.0,1.0,0.0,1.0,0.0,1.0,0.0,2.002004e-31,1.0,1.638005e-18,0.0,0.0,0.0,0.0,0.0,1.651189e-30,1.0,0.0,0.0,1.0,0.0,0.0,1.032785e-07,0.0,1.0,1.0,0.0,3.268033e-16,1.930517e-21,1.0,1.1655139999999998e-19,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.872347e-16,7.936776e-10,5.019957e-13,0.0,1.0,2.853039e-17,0.0,0.0,408
1,4.2835060000000005e-17,0.0,0.0,4.855118e-28,6.326536e-38,0.0,1.0,0.0,9.652075e-24,1.0,0.0,1.0,1.0,1.031656e-17,2.182152e-15,5.5057170000000006e-39,0.0,1.0,0.0,1.8819e-12,0.0,5.837283e-18,0.0,2.839647e-27,0.0,3.209326e-12,1.507596e-20,0.0,1.091654e-12,0.0,0.0,8.96622e-34,3.834059e-25,2.168436e-22,0.0,3.8549619999999996e-38,4.837353e-09,0.0,0.0,1.0,5.313554e-16,1.947943e-24,2.1574030000000003e-23,0.0,1.0,1.182943e-18,0.0,1.0,0.0,1.0,0.0,1.0,0.0,2.248767e-31,1.0,6.767643999999999e-19,0.0,0.0,0.0,0.0,0.0,4.3266689999999995e-30,1.0,0.0,0.0,1.0,0.0,0.0,1.054827e-07,0.0,1.0,1.0,0.0,5.932086e-16,1.633815e-21,1.0,1.114379e-19,1.0,1.0,0.0,0.0,1.0,0.0,0.0,2.509016e-16,1.257281e-09,9.820692e-13,0.0,1.0,3.9353570000000005e-17,0.0,0.0,1110
2,5.1212520000000006e-17,0.0,0.0,3.6362870000000003e-28,5.617672999999999e-38,0.0,1.0,0.0,1.0317040000000002e-23,1.0,0.0,1.0,1.0,8.356164000000001e-18,2.189356e-15,6.061119e-39,0.0,1.0,0.0,1.160963e-12,0.0,5.584122e-18,0.0,1.851872e-27,0.0,4.028854e-12,1.6735819999999998e-20,0.0,1.005999e-12,0.0,0.0,1.05171e-33,2.8784650000000002e-25,1.560496e-22,0.0,4.0207469999999995e-38,3.12261e-09,0.0,0.0,1.0,3.221026e-16,1.811352e-24,2.1274100000000003e-23,0.0,1.0,9.786649e-19,0.0,1.0,0.0,1.0,0.0,1.0,0.0,2.1204660000000002e-31,1.0,1.005599e-18,0.0,0.0,0.0,0.0,0.0,2.6052659999999998e-30,1.0,0.0,0.0,1.0,0.0,0.0,1.079986e-07,0.0,1.0,1.0,0.0,4.766383e-16,1.67001e-21,1.0,1.1648999999999999e-19,1.0,1.0,0.0,0.0,1.0,0.0,0.0,2.129732e-16,1.067851e-09,7.063387e-13,0.0,1.0,3.1908420000000005e-17,0.0,0.0,259
3,5.563691e-17,0.0,0.0,2.289602e-28,3.9807019999999995e-38,0.0,1.0,0.0,1.2634190000000001e-23,1.0,0.0,1.0,1.0,7.534924e-18,2.291364e-15,7.730554000000001e-39,0.0,1.0,0.0,7.032468e-13,0.0,4.840403e-18,0.0,8.918762000000001e-28,0.0,5.65571e-12,1.831114e-20,0.0,8.939438e-13,0.0,0.0,1.2831370000000002e-33,2.0644180000000002e-25,9.730307000000001e-23,0.0,3.873801e-38,1.759165e-09,0.0,0.0,1.0,1.385013e-16,1.651798e-24,1.9807870000000002e-23,0.0,1.0,5.677013e-19,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.93995e-31,1.0,2.116117e-18,0.0,0.0,0.0,0.0,0.0,1.2803859999999999e-30,1.0,0.0,0.0,1.0,0.0,0.0,1.015459e-07,0.0,1.0,1.0,0.0,2.706433e-16,2.061511e-21,1.0,1.171442e-19,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.738648e-16,6.849711e-10,4.172294e-13,0.0,1.0,2.6555740000000003e-17,0.0,0.0,354
4,5.4218660000000004e-17,0.0,0.0,2.551547e-28,4.2940619999999997e-38,0.0,1.0,0.0,1.2085510000000001e-23,1.0,0.0,1.0,1.0,7.776897e-18,2.269962e-15,7.323051000000001e-39,0.0,1.0,0.0,7.976738e-13,0.0,4.99375e-18,0.0,1.056748e-27,0.0,5.219773e-12,1.788587e-20,0.0,9.19375e-13,0.0,0.0,1.222567e-33,2.240443e-25,1.088716e-22,0.0,3.8947829999999997e-38,2.020235e-09,0.0,0.0,1.0,1.68538e-16,1.689164e-24,2.011067e-23,0.0,1.0,6.400174e-19,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.9811120000000003e-31,1.0,1.783976e-18,0.0,0.0,0.0,0.0,0.0,1.516974e-30,1.0,0.0,0.0,1.0,0.0,0.0,1.02699e-07,0.0,1.0,1.0,0.0,3.068956e-16,1.973225e-21,1.0,1.167471e-19,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.826681e-16,7.556472e-10,4.719809e-13,0.0,1.0,2.785588e-17,0.0,0.0,387
