In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
import pandas as pd

# Load your data
data = pd.read_csv("allAtt_onehot_large_train.csv")
dataT = pd.read_csv("allAtt_onehot_large_test.csv")

# Assuming your data has features and labels
features = data.iloc[:, 1:28].values  # Assuming features are from columns 1 to 27
labels = data.iloc[:, 28].values  # Assuming labels are from column 28

# Split data into train and validation sets
train_features, val_features, train_labels, val_labels = train_test_split(features, labels, test_size=0.2, random_state=42)

# Convert data to PyTorch tensors
train_features = torch.Tensor(train_features).view(-1, 9, 3)  # Reshape to match chunk size and number of chunks
val_features = torch.Tensor(val_features).view(-1, 9, 3)
train_labels = torch.Tensor(train_labels).long()  # Assuming labels are integers
val_labels = torch.Tensor(val_labels).long()

# Create DataLoader for training and validation sets
train_dataset = TensorDataset(train_features, train_labels)
val_dataset = TensorDataset(val_features, val_labels)

batch_size = 60
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# Define LSTM model
class FootballLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(FootballLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])  # Take the output of the last time step
        return out

# Initialize the model
input_size = train_features.size(2)  # Number of features
hidden_size = 256
output_size = 2  # Adjust based on your number of classes
model = FootballLSTM(input_size, hidden_size, output_size)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 20
for epoch in range(num_epochs):
    model.train()
    total_train_correct = 0
    total_train_samples = 0
    for batch_features, batch_labels in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_features)
        loss = criterion(outputs, batch_labels)
        loss.backward()
        optimizer.step()

        _, predicted = torch.max(outputs, 1)
        total_train_samples += batch_labels.size(0)
        total_train_correct += (predicted == batch_labels).sum().item()

    train_accuracy = total_train_correct / total_train_samples

    # Validation
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_features, batch_labels in val_loader:
            outputs = model(batch_features)
            loss = criterion(outputs, batch_labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += batch_labels.size(0)
            correct += (predicted == batch_labels).sum().item()

    accuracy = correct / total
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}, '
          f'Training Accuracy: {train_accuracy:.4f}')

# Save the trained model
torch.save(model.state_dict(), 'football_lstm_model.pth')


Epoch 1/20, Loss: 0.5486, Training Accuracy: 0.6626
Epoch 2/20, Loss: 0.5493, Training Accuracy: 0.6647
Epoch 3/20, Loss: 0.5346, Training Accuracy: 0.6633
Epoch 4/20, Loss: 0.5485, Training Accuracy: 0.6633
Epoch 5/20, Loss: 0.5343, Training Accuracy: 0.6687
Epoch 6/20, Loss: 0.5322, Training Accuracy: 0.6673
Epoch 7/20, Loss: 0.5294, Training Accuracy: 0.6687
Epoch 8/20, Loss: 0.5302, Training Accuracy: 0.6707
Epoch 9/20, Loss: 0.5121, Training Accuracy: 0.6640
Epoch 10/20, Loss: 0.5345, Training Accuracy: 0.6714
Epoch 11/20, Loss: 0.5223, Training Accuracy: 0.6620
Epoch 12/20, Loss: 0.5185, Training Accuracy: 0.6747
Epoch 13/20, Loss: 0.4958, Training Accuracy: 0.6613
Epoch 14/20, Loss: 0.5203, Training Accuracy: 0.6680
Epoch 15/20, Loss: 0.5096, Training Accuracy: 0.6714
Epoch 16/20, Loss: 0.4930, Training Accuracy: 0.6734
Epoch 17/20, Loss: 0.5195, Training Accuracy: 0.6680
Epoch 18/20, Loss: 0.5073, Training Accuracy: 0.6714
Epoch 19/20, Loss: 0.5088, Training Accuracy: 0.6694
Ep

In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load your data
data = pd.read_csv("allAtt_onehot_large_train.csv")
dataT = pd.read_csv("allAtt_onehot_large_test.csv")

# Assuming your data has features and labels
features = data.iloc[:, 1:28].values  # Assuming features are from columns 1 to 27
labels = data.iloc[:, 28].values  # Assuming labels are from column 28

# Split data into train, validation, and test sets
train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size=0.2, random_state=42)
val_features, test_features, val_labels, test_labels = train_test_split(test_features, test_labels, test_size=0.5, random_state=42)

# Initialize Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
rf_classifier.fit(train_features, train_labels)

# Make predictions on the training set
train_predictions = rf_classifier.predict(train_features)

# Calculate accuracy on the training set
train_accuracy = accuracy_score(train_labels, train_predictions)
print(f'Training Accuracy: {train_accuracy:.4f}')

# Make predictions on the validation set
val_predictions = rf_classifier.predict(val_features)

# Calculate accuracy on the validation set
val_accuracy = accuracy_score(val_labels, val_predictions)
print(f'Validation Accuracy: {val_accuracy:.4f}')

# Make predictions on the test set
test_predictions = rf_classifier.predict(test_features)

# Calculate accuracy on the test set
test_accuracy = accuracy_score(test_labels, test_predictions)
print(f'Test Accuracy: {test_accuracy:.4f}')


Training Accuracy: 0.9953
Validation Accuracy: 0.7903
Test Accuracy: 0.7742


In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Load your data
data = pd.read_csv("allAtt_onehot_large_train.csv")
dataT = pd.read_csv("allAtt_onehot_large_test.csv")

# Assuming your data has features and labels
features = data.iloc[:, 1:28].values  # Assuming features are from columns 1 to 27
labels = data.iloc[:, 28].values  # Assuming labels are from column 28

# Split data into train, validation, and test sets
train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size=0.2, random_state=42)
val_features, test_features, val_labels, test_labels = train_test_split(test_features, test_labels, test_size=0.5, random_state=42)

# Initialize Logistic Regression model
logistic_reg_model = LogisticRegression(random_state=42)

# Train the model
logistic_reg_model.fit(train_features, train_labels)

# Make predictions on the training set
train_predictions = logistic_reg_model.predict(train_features)

# Calculate accuracy on the training set
train_accuracy = accuracy_score(train_labels, train_predictions)
print(f'Training Accuracy: {train_accuracy:.4f}')

# Make predictions on the validation set
val_predictions = logistic_reg_model.predict(val_features)

# Calculate accuracy on the validation set
val_accuracy = accuracy_score(val_labels, val_predictions)
print(f'Validation Accuracy: {val_accuracy:.4f}')

# Make predictions on the test set
test_predictions = logistic_reg_model.predict(test_features)

# Calculate accuracy on the test set
test_accuracy = accuracy_score(test_labels, test_predictions)
print(f'Test Accuracy: {test_accuracy:.4f}')


Training Accuracy: 0.6882
Validation Accuracy: 0.7151
Test Accuracy: 0.7043


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
