In [None]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader, random_split
from torch import nn, optim
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_curve, auc, roc_auc_score
import matplotlib.pyplot as plt

In [None]:
import pyro
import pyro.distributions as dist
from pyro.infer import SVI, Trace_ELBO
from pyro.optim import Adam

In [None]:
class GamingDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        features = self.features[idx]
        label = self.labels[idx]
        return torch.tensor(features), torch.tensor(label)

In [None]:
csv_file = './online_gaming_behavior_dataset.csv'
data_frame = pd.read_csv(csv_file)

In [None]:
# data_frame.fillna(data_frame.mean(), inplace=True)

In [None]:
label_encoders = {}
for column in data_frame.columns:
    if data_frame[column].dtype == 'object':
        le = LabelEncoder()
        data_frame[column] = le.fit_transform(data_frame[column])
        label_encoders[column] = le

In [None]:
features = data_frame.iloc[:, :-1].values.astype(np.float32)
labels = data_frame.iloc[:, -1].values.astype(np.int64)

In [None]:
train_features, test_features, train_labels, test_labels = train_test_split(
    features, labels, test_size=0.2, random_state=42
)

In [None]:
scaler = StandardScaler().fit(train_features)
train_features = scaler.transform(train_features)
test_features = scaler.transform(test_features)

In [None]:
pca = PCA(n_components=0.95)
train_features = pca.fit_transform(train_features)
test_features = pca.transform(test_features)

In [None]:
train_dataset = GamingDataset(train_features, train_labels)
test_dataset = GamingDataset(test_features, test_labels)

In [None]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
class BayesianNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(BayesianNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = self.fc1(x)
        x = torch.relu(x)
        x = self.fc2(x)
        return x

def model(x_data, y_data=None):
    priors = {
        "fc1.weight": dist.Normal(0, 1).expand([128, input_dim]).to_event(2),
        "fc1.bias": dist.Normal(0, 1).expand([128]).to_event(1),
        "fc2.weight": dist.Normal(0, 1).expand([output_dim, 128]).to_event(2),
        "fc2.bias": dist.Normal(0, 1).expand([output_dim]).to_event(1)
    }
    lifted_module = pyro.random_module("module", bayesian_nn, priors)
    lifted_reg_model = lifted_module()
    
    with pyro.plate("map", x_data.shape[0]):
        prediction_mean = lifted_reg_model(x_data)
        pyro.sample("obs", dist.Categorical(logits=prediction_mean), obs=y_data)

def guide(x_data, y_data=None):
    priors = {
        "fc1.weight": dist.Normal(pyro.param("fc1_weight_mu", torch.randn_like(bayesian_nn.fc1.weight)),
                                  pyro.param("fc1_weight_sigma", torch.ones_like(bayesian_nn.fc1.weight))).to_event(2),
        "fc1.bias": dist.Normal(pyro.param("fc1_bias_mu", torch.randn_like(bayesian_nn.fc1.bias)),
                                pyro.param("fc1_bias_sigma", torch.ones_like(bayesian_nn.fc1.bias))).to_event(1),
        "fc2.weight": dist.Normal(pyro.param("fc2_weight_mu", torch.randn_like(bayesian_nn.fc2.weight)),
                                  pyro.param("fc2_weight_sigma", torch.ones_like(bayesian_nn.fc2.weight))).to_event(2),
        "fc2.bias": dist.Normal(pyro.param("fc2_bias_mu", torch.randn_like(bayesian_nn.fc2.bias)),
                                pyro.param("fc2_bias_sigma", torch.ones_like(bayesian_nn.fc2.bias))).to_event(1)
    }
    lifted_module = pyro.random_module("module", bayesian_nn, priors)
    return lifted_module()


In [None]:
input_dim = train_features.shape[1]
output_dim = len(np.unique(train_labels))

bayesian_nn = BayesianNN(input_dim, 128, output_dim)

In [None]:
optimizer = Adam({"lr": 0.001})
svi = SVI(model, guide, optimizer, loss=Trace_ELBO())

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
bayesian_nn = bayesian_nn.to(device)

In [None]:
num_epochs = 20
losses = []

for epoch in range(num_epochs):
    epoch_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(torch.float32), labels.to(torch.long)
        inputs = inputs.to(device)
        labels = labels.to(device)
        epoch_loss += svi.step(inputs, labels)
    losses.append(epoch_loss / len(train_loader.dataset))
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss/len(train_loader.dataset):.4f}")

plt.plot(losses)
plt.title("Training Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.show()

In [None]:
bayesian_nn.eval()
all_labels = []
all_predictions = []

with torch.no_grad():
    for inputs, labels in test_loader:
        probabilities = bayesian_nn(inputs.to(torch.float32))
        _, predicted = torch.max(probabilities, 1)
        
        all_labels.extend(labels.numpy())
        all_predictions.extend(predicted.numpy())

In [None]:
accuracy = accuracy_score(all_labels, all_predictions)
precision = precision_score(all_labels, all_predictions, average='weighted')
recall = recall_score(all_labels, all_predictions, average='weighted')
f1 = f1_score(all_labels, all_predictions, average='weighted')

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")