In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import KFold
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
df = pd.read_csv("dataset.txt", header=None, names=['total_flights', 'num_cancellations', 'time_since_booking', 'season_cancelled', 'cancel_label'])

X = torch.tensor(df[['total_flights', 'num_cancellations', 'time_since_booking', 'season_cancelled']].values, dtype=torch.float32)
y = torch.tensor(df['cancel_label'].values, dtype=torch.float32).view(-1, 1)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [3]:
import torch.nn as nn
import torch.optim as optim


class CancellationPredictor(nn.Module):
    def __init__(self, input_size):
        super(CancellationPredictor, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.sigmoid(self.fc3(x))
        return x

input_size = X_train.shape[1]
model = CancellationPredictor(input_size)

optimizer = optim.Adam(model.parameters(), lr=0.001)

criterion = nn.BCELoss()

In [4]:
def accuracy(y_true, y_pred):

    y_pred_labels = (y_pred > 0.5).float()
    correct = (y_pred_labels == y_true).float()
    return correct.mean().item()

In [5]:
from sklearn.model_selection import KFold
import numpy as np

kfold = KFold(n_splits=5, shuffle=True, random_state=42)
results = {'val_loss': [], 'val_accuracy': []}

for fold, (train_ids, val_ids) in enumerate(kfold.split(X_train)):
    print(f'Fold {fold+1}')
    X_train_fold, X_val_fold = X_train[train_ids], X_train[val_ids]
    y_train_fold, y_val_fold = y_train[train_ids], y_train[val_ids]

    for epoch in range(15):
        model.train()
        optimizer.zero_grad()
        outputs = model(X_train_fold)
        loss = criterion(outputs, y_train_fold)
        loss.backward()
        optimizer.step()

    model.eval()
    with torch.no_grad():
        val_outputs = model(X_val_fold)
        val_loss = criterion(val_outputs, y_val_fold)
        val_acc = accuracy(y_val_fold, val_outputs)

        results['val_loss'].append(val_loss.item())
        results['val_accuracy'].append(val_acc)

        print(f'Validation Loss: {val_loss.item()}, Validation Accuracy: {val_acc}')

print(f'Average Validation Loss: {np.mean(results["val_loss"])}')
print(f'Average Validation Accuracy: {np.mean(results["val_accuracy"])}')

Fold 1
Validation Loss: 0.18247516453266144, Validation Accuracy: 0.96875
Fold 2
Validation Loss: 0.33291417360305786, Validation Accuracy: 0.934374988079071
Fold 3
Validation Loss: 0.22124318778514862, Validation Accuracy: 0.9593750238418579
Fold 4
Validation Loss: 0.30435997247695923, Validation Accuracy: 0.9281250238418579
Fold 5
Validation Loss: 0.1661204844713211, Validation Accuracy: 0.96875
Average Validation Loss: 0.24142259657382964
Average Validation Accuracy: 0.9518750071525574


In [6]:
model.eval()
with torch.no_grad():
    test_outputs = model(X_test)
    test_loss = criterion(test_outputs, y_test)
    test_acc = accuracy(y_test, test_outputs)
    print(f'Test Loss: {test_loss.item()}, Test Accuracy: {test_acc}')

Test Loss: 0.2049340456724167, Test Accuracy: 0.9549999833106995


In [8]:
fly = [[112,9,133,3], [68,5,365,3], [56,3,209,1]]

data = np.copy(fly)
pr = []

for person in data:

    person_data = {
        'total_flights': person[0],
        'num_cancellations': person[1],
        'time_since_booking': person[2],
        'season_cancelled': person[3]
    }


    person_tensor = torch.tensor([person_data['total_flights'],
                                  person_data['num_cancellations'],
                                  person_data['time_since_booking'],
                                  person_data['season_cancelled']], dtype=torch.float32).unsqueeze(0)

    model.eval()


    with torch.no_grad():
        prediction = model(person_tensor)
        probability = prediction.item()

    pr.append(probability)

print(pr)

[0.06298530101776123, 0.008177933283150196, 0.06501767039299011]


In [9]:

predicted_probabilities = np.array(pr)

prob_no_one_cancels = np.prod(1 - predicted_probabilities)

prob_at_least_one_cancels = 1 - prob_no_one_cancels

print(f'Вероятность того, что хотя бы один пассажир откажется: {prob_at_least_one_cancels:.4f}')

Вероятность того, что хотя бы один пассажир откажется: 0.1311
