In [None]:
from torch.utils.data import Dataset, DataLoader, random_split
import numpy as np
import torch

In [None]:
import pandas as pd
import requests
from bs4 import BeautifulSoup


def extract_player_data(url):
    r = requests.get(url)
    soup = BeautifulSoup(r.text, "lxml")
    tables = soup.find_all("table", class_="wf-table-inset mod-overview")

    player_data = []
    relevant_table_indices = [0, 1, 4, 5]  # Indices of tables to consider
    for idx, table in enumerate(tables):
        if idx not in relevant_table_indices:
            continue
        headers = [th.text for th in table.find_all("th")]
        headers.append("Team")  # Add "Team" as an additional column header
        data_rows = table.find_all("tr")[1:]

        for row in data_rows:
            row_data = []
            team_name = ""
            for td in row.find_all("td"):
                if td.get("class") == ["mod-stat", "mod-vlr-deaths"]:
                    deaths_span = td.find("span", class_="mod-both")
                    row_data.append(deaths_span.text.strip() if deaths_span else "")
                else:
                    text = td.text.strip()
                    if text:
                        first_value = text.split()[0]
                        row_data.append(first_value)
                    else:
                        row_data.append("")
                if td.get("class") == ["mod-player"]:
                    team_name_div = td.find("div", class_="ge-text-light")
                    team_name = team_name_div.text.strip() if team_name_div else ""
            row_data.append(team_name)
            row_data.append(url)  # Append URL to the row data
            player_data.append(row_data)

    return pd.DataFrame(player_data, columns=headers + ["URL"])  # Add "URL" to headers


with open("player_urls.txt", "r") as file:
    urls = [line.strip() for line in file.readlines()]

    # Combine data from relevant tables of all URLs into a single DataFrame
    combined_df = pd.concat([extract_player_data(url) for url in urls], ignore_index=True)

    # Define the file path
    file_path = "Player_Stats_Per_Map.xlsx"

    # Save the combined DataFrame to an Excel file
    combined_df.to_excel(file_path, index=False)

    # Read the Excel file again
    combined_df = pd.read_excel(file_path)

    # Cleaning Data
    combined_df.rename(columns={combined_df.columns[0]: "Name"}, inplace=True)
    combined_df.drop(combined_df.columns[1], axis=1, inplace=True)
    combined_df.sort_values(by="Name", inplace=True)
    combined_df['KAST'] = combined_df['KAST'].str.rstrip('%').astype(float) / 100
    combined_df['HS%'] = combined_df['HS%'].str.rstrip('%').astype(float) / 100

    # Save the modified DataFrame back to the Excel file
    combined_df.to_excel(file_path, index=False)

    print(f'Data saved to {file_path}')

Data saved to Player_Stats_Per_Map.xlsx


In [None]:
playerData = pd.read_excel("Player_Stats_Per_Map.xlsx")

In [None]:
playerData

Unnamed: 0,Name,R,ACS,K,D,A,+/–,KAST,ADR,HS%,FK,FD,+/–.1,Team,URL
0,Benkai,0.44,69,4,14,1,-10,0.45,55,0.17,0,3,-3,GE,https://www.vlr.gg/296730/zeta-division-vs-glo...
1,Benkai,0.89,197,13,16,8,-3,0.73,122,0.24,3,1,2,GE,https://www.vlr.gg/296738/bleed-vs-global-espo...
2,Benkai,0.96,175,15,14,2,1,0.70,120,0.30,0,2,-2,GE,https://www.vlr.gg/296738/bleed-vs-global-espo...
3,Benkai,0.60,87,7,16,2,-9,0.48,59,0.16,0,2,-2,GE,https://www.vlr.gg/296739/zeta-division-vs-glo...
4,Benkai,0.92,156,13,15,4,-2,0.61,112,0.30,0,2,-2,GE,https://www.vlr.gg/296730/zeta-division-vs-glo...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
115,yuran,1.13,218,18,14,4,4,0.65,140,0.30,4,3,1,ZETA,https://www.vlr.gg/296730/zeta-division-vs-glo...
116,yuran,1.36,221,17,9,4,8,0.86,134,0.33,2,1,1,ZETA,https://www.vlr.gg/296739/zeta-division-vs-glo...
117,yuran,0.38,132,7,17,5,-10,0.41,70,0.19,0,3,-3,ZETA,https://www.vlr.gg/296737/t1-vs-zeta-division-...
118,yuran,1.61,262,20,9,8,11,0.85,181,0.25,3,1,2,ZETA,https://www.vlr.gg/296730/zeta-division-vs-glo...


#Database alterations:
Changing Name to ints.
Team names to ints.

In [None]:
PlayerDictionary = {}
PlayerNames = playerData.iloc[ :, 0:1].values
PlayerNamesDF = pd.DataFrame(PlayerNames)
PlayerNames = PlayerNamesDF.drop_duplicates().iloc[:, 0:1].values.T.squeeze()
for players in range(PlayerNames.size):
  PlayerDictionary[PlayerNames[players]] = players
#print(PlayerDictionary)
ListP = PlayerNamesDF.iloc[:, 0:1].values.squeeze()
namesToNumber = []
for p in ListP:
  namesToNumber.append(PlayerDictionary[p])
namesToNumber = np.array(namesToNumber)
#print(namesToNumber.reshape(-1, 1))

In [None]:
#playerData = playerData.drop(columns=playerData.columns[0])
playerData[playerData.columns[0]] = namesToNumber
print(playerData)

     Name     R  ACS   K   D   A  +/–  KAST  ADR   HS%  FK  FD  +/–.1  Team  \
0       0  0.44   69   4  14   1  -10  0.45   55  0.17   0   3     -3    GE   
1       0  0.89  197  13  16   8   -3  0.73  122  0.24   3   1      2    GE   
2       0  0.96  175  15  14   2    1  0.70  120  0.30   0   2     -2    GE   
3       0  0.60   87   7  16   2   -9  0.48   59  0.16   0   2     -2    GE   
4       0  0.92  156  13  15   4   -2  0.61  112  0.30   0   2     -2    GE   
..    ...   ...  ...  ..  ..  ..  ...   ...  ...   ...  ..  ..    ...   ...   
115    29  1.13  218  18  14   4    4  0.65  140  0.30   4   3      1  ZETA   
116    29  1.36  221  17   9   4    8  0.86  134  0.33   2   1      1  ZETA   
117    29  0.38  132   7  17   5  -10  0.41   70  0.19   0   3     -3  ZETA   
118    29  1.61  262  20   9   8   11  0.85  181  0.25   3   1      2  ZETA   
119    29  1.07  208  17  18  11   -1  0.71  144  0.25   2   1      1  ZETA   

                                                   

In [None]:
X = playerData.iloc[:, [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12]].values
Y = playerData.iloc[:, 3:4].values

In [None]:
import torch
import numpy as np
from torch.utils.data import Dataset

class PlayerData(Dataset):
    def __init__(self, features, labels, window_size, step_size, transform=None):
        self.features = features
        self.labels = labels
        self.window_size = window_size
        self.step_size = step_size
        self.transform = transform

        # Initialize list to store windows
        self.windows = []
        self.target_windows = []

        # Create windows
        for i in range(0, len(self.features) - window_size + 1, step_size):
            window = self.features[i:i+window_size]
            target_window = self.labels[i:i+window_size]
            self.windows.append(window)
            self.target_windows.append(target_window)

    def __len__(self):
        return len(self.windows)

    def __getitem__(self, index):
        feature = self.windows[index].astype(np.float32)  # Convert features to float32
        label = self.target_windows[index].astype(np.float32)

        # Apply transformation if specified
        if self.transform:
            feature, label = self.transform(feature, label)

        return feature, label

def transform_function(feature, label):
    feature = torch.tensor(feature, dtype=torch.float32)
    label = torch.tensor(label, dtype=torch.float32)
    feature = (feature - feature.mean()) / feature.std()
    return feature, label

X = playerData.iloc[:, [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12]].values
Y = playerData.iloc[:, 3:4].values

window_size = 10
step_size = 1
batch_size = 10
dataset = PlayerData(X, Y, window_size, step_size, transform=None)
loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [None]:
import torch

# Assuming you have a DataLoader named 'dataloader'
for batch in loader:
    features, labels = batch  # Assuming each batch contains features and labels
    print("Features shape:", features.shape)
    print("Labels shape:", labels.shape)
    break  # Stop after printing the first batch


Features shape: torch.Size([10, 10, 12])
Labels shape: torch.Size([10, 10, 1])


#LSTM Model

In [None]:
import torch
import torch.nn as nn

class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # Initialize the hidden and cell states
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Pass the input tensor through the LSTM layer
        out, _ = self.lstm(x, (h0, c0))

        # Use the output of the last timestep for prediction
        out = self.fc(out[:, -1, :])
        return out


# Example usage
input_size = 12  # Number of features in input
hidden_size = 64  # Number of features in hidden state of the LSTM
num_layers = 2  # Number of LSTM layers
output_size = 1  # Number of output classes, for regression set this to 1

# Create an instance of the LSTM model
model = LSTMModel(input_size, hidden_size, num_layers, output_size)

In [None]:
from torchsummary import summary

# Assuming 'device' is your chosen device (e.g., 'cuda' or 'cpu')
# 'input_size' should match the input size of your model
print(model)

LSTMModel(
  (lstm): LSTM(12, 64, num_layers=2, batch_first=True)
  (fc): Linear(in_features=64, out_features=1, bias=True)
)


#Training

In [None]:
# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
def train_model(model, criterion, optimizer, dataloader, num_epochs):
    for epoch in range(num_epochs):
        model.train()  # Set the model to training mode
        running_loss = 0.0

        for inputs, targets in loader:
            # Move inputs and targets to the appropriate device (e.g., GPU)
            inputs, targets = inputs.to(device), targets.to(device)

            # Forward pass
            outputs = model(inputs)
            # Compute the loss
            loss = criterion(outputs, targets)

            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Update running loss
            running_loss += loss.item() * inputs.size(0)

        # Calculate average loss for the epoch
        epoch_loss = running_loss / len(dataloader.dataset)

        # Print the average loss for the epoch
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}')
train_model(model, criterion, optimizer, train_loader, 10)

Epoch [1/10], Loss: 301.6975
Epoch [2/10], Loss: 246.9351
Epoch [3/10], Loss: 185.1843
Epoch [4/10], Loss: 140.9329
Epoch [5/10], Loss: 113.9726
Epoch [6/10], Loss: 95.8845
Epoch [7/10], Loss: 82.7446
Epoch [8/10], Loss: 72.6371
Epoch [9/10], Loss: 64.8310
Epoch [10/10], Loss: 58.5637


#Testing

In [None]:
# Assuming inputs and target_inputs are already defined
# Define the sizes for training and testing sets
# Example usage
window_size = 10
step_size = 1
batch_size = 10
train_size = int(0.8 * len(inputs))
test_size = len(inputs) - train_size

# Split the dataset
train_dataset, test_dataset = random_split(PlayerData(X, Y, window_size, step_size, transform=transform_function), [train_size, test_size])

# DataLoader for training and testing sets
batch_size = 10
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

for batch in test_loader:
    features, labels = batch  # Assuming each batch contains features and labels
    print("Features shape:", features.shape)
    print("Labels shape:", labels.shape)
    break  # Stop after printing the first batch

Features shape: torch.Size([10, 10, 12])
Labels shape: torch.Size([10, 10, 1])


In [None]:
def test_model(model, loader):
    model.eval()  # Set the model to evaluation mode
    correct_predictions = 0
    total_predictions = 0

    with torch.no_grad():
        for inputs, targets in loader:
            # Move inputs and targets to the device
            inputs, targets = inputs.to(device), targets.to(device)

            # Forward pass
            outputs = model(inputs)

            # Get predicted labels
            _, predicted = torch.max(outputs, 1)

            # Count correct predictions
            correct_predictions += (predicted == targets).sum().item()
            total_predictions += targets.size(0)

            # Print predictions and ground truth labels
            for pred, label in zip(predicted, targets):
                print(f'Predicted: {pred}, Actual: {label}')

    # Calculate accuracy
    accuracy = correct_predictions / total_predictions
    return accuracy
# Test the model using the test DataLoader
test_accuracy = test_model(model, loader)
print(f'Accuracy on test set: {test_accuracy:.2%}')

Predicted: 0, Actual: tensor([[19.],
        [24.],
        [20.],
        [11.],
        [27.],
        [16.],
        [ 6.],
        [19.],
        [29.],
        [20.]])
Predicted: 0, Actual: tensor([[17.],
        [16.],
        [15.],
        [11.],
        [10.],
        [11.],
        [ 9.],
        [17.],
        [ 7.],
        [14.]])
Predicted: 0, Actual: tensor([[16.],
        [16.],
        [10.],
        [15.],
        [ 5.],
        [13.],
        [13.],
        [14.],
        [ 9.],
        [13.]])
Predicted: 0, Actual: tensor([[10.],
        [11.],
        [ 9.],
        [17.],
        [ 7.],
        [14.],
        [15.],
        [18.],
        [12.],
        [16.]])
Predicted: 0, Actual: tensor([[11.],
        [10.],
        [11.],
        [ 9.],
        [17.],
        [ 7.],
        [14.],
        [15.],
        [18.],
        [12.]])
Predicted: 0, Actual: tensor([[ 7.],
        [ 7.],
        [ 7.],
        [19.],
        [17.],
        [ 9.],
        [15.],
        