<a href="https://colab.research.google.com/github/Salehnaz/LLM_Football_Commentary_AI/blob/main/CommentaryAI_Football_Colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ⚽ CommentaryAI - Football Event Classifier
A simple transformer-based AI model that reads football commentary lines and classifies the event (Goal, Miss, Save, etc.).

🔍 No traditional if-else coding — just AI learning from text patterns!

---

In [None]:
# 🛠️ Install dependencies
!pip install torch scikit-learn --quiet


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m105.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m83.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m45.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
import os
print(os.getcwd())

from google.colab import files

# Upload your CSV
uploaded = files.upload()

import pandas as pd

# Now read it
df = pd.read_csv("commentary_dataset_200.csv")
print(df.head())

/content


Saving commentary_dataset_200.csv to commentary_dataset_200.csv
                       commentary    label
0  Skillful dribble near the box.  DRIBBLE
1  Tackles and clears the danger!   TACKLE
2     Illegal tackle from behind!     FOUL
3           Great sliding tackle!   TACKLE
4    Rough challenge from behind!     FOUL


In [None]:
# 📦 Step 1: Prepare Sample Data
# samples = [
#     ("He shoots and scores!", "GOAL"),
#     ("That’s a brilliant save by the keeper!", "SAVE"),
#     ("He passes it to the wing.", "PASS"),
#     ("It’s wide of the post!", "MISS"),
#     ("He takes a long shot!", "SHOT"),
#     ("He beats the defender with a dribble.", "DRIBBLE"),
#     ("That’s a foul by the defender!", "FOUL"),
#     ("Header from the corner — goal!", "GOAL"),
#     ("The striker completely misses the target.", "MISS"),
#     ("He calmly rolls it to the midfielder.", "PASS"),
# ]

import pandas as pd

# 📦 Load data from uploaded CSV
df = pd.read_csv("commentary_dataset_200.csv")

# See a preview
print(df.head())

# Extract samples
samples = list(zip(df['commentary'], df['label']))



                       commentary    label
0  Skillful dribble near the box.  DRIBBLE
1  Tackles and clears the danger!   TACKLE
2     Illegal tackle from behind!     FOUL
3           Great sliding tackle!   TACKLE
4    Rough challenge from behind!     FOUL


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# 🔠 Step 2: Tokenization
token2idx = {"<PAD>": 0, "<UNK>": 1}
for sentence, _ in samples:
    for token in sentence.lower().split():
        if token not in token2idx:
            token2idx[token] = len(token2idx)

def encode_sentence(sentence, max_len=10):
    tokens = sentence.lower().split()
    token_ids = [token2idx.get(tok, token2idx["<UNK>"]) for tok in tokens]
    return token_ids[:max_len] + [0] * (max_len - len(token_ids))

X = [encode_sentence(s, max_len=10) for s, _ in samples]

from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y = le.fit_transform([label for _, label in samples])


In [None]:
# 📚 Step 3: Dataset & Transformer Model
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader

class CommentaryDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X)
        self.y = torch.tensor(y)
    def __len__(self): return len(self.X)
    def __getitem__(self, idx): return self.X[idx], self.y[idx]

class CommentaryAIModel(nn.Module):
    def __init__(self, vocab_size, embed_dim, num_classes):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        encoder_layer = nn.TransformerEncoderLayer(d_model=embed_dim, nhead=2)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=1)
        self.fc = nn.Linear(embed_dim, num_classes)
    def forward(self, x):
        x = self.embedding(x)
        x = x.permute(1, 0, 2)
        x = self.transformer(x)
        x = x.mean(dim=0)
        return self.fc(x)


In [None]:
# 🧠 Step 4: Training the model
dataset = CommentaryDataset(X, y)
loader = DataLoader(dataset, batch_size=2, shuffle=True)
model = CommentaryAIModel(vocab_size=len(token2idx), embed_dim=32, num_classes=len(le.classes_))
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()

for epoch in range(100):
    total_loss, correct = 0, 0
    for xb, yb in loader:
        preds = model(xb)
        loss = loss_fn(preds, yb)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        total_loss += loss.item()
        correct += (preds.argmax(dim=1) == yb).sum().item()
    print(f"📘 Epoch {epoch+1} - Loss: {total_loss:.4f} - Accuracy: {correct}/{len(dataset)}")




📘 Epoch 1 - Loss: 124.8263 - Accuracy: 4/100
📘 Epoch 2 - Loss: 114.9972 - Accuracy: 15/100
📘 Epoch 3 - Loss: 109.2129 - Accuracy: 19/100
📘 Epoch 4 - Loss: 101.2718 - Accuracy: 44/100
📘 Epoch 5 - Loss: 94.8293 - Accuracy: 42/100
📘 Epoch 6 - Loss: 80.7811 - Accuracy: 51/100
📘 Epoch 7 - Loss: 63.3782 - Accuracy: 71/100
📘 Epoch 8 - Loss: 49.2317 - Accuracy: 82/100
📘 Epoch 9 - Loss: 40.1438 - Accuracy: 85/100
📘 Epoch 10 - Loss: 31.2573 - Accuracy: 89/100
📘 Epoch 11 - Loss: 25.0213 - Accuracy: 91/100
📘 Epoch 12 - Loss: 16.3532 - Accuracy: 100/100
📘 Epoch 13 - Loss: 12.9587 - Accuracy: 98/100
📘 Epoch 14 - Loss: 8.7893 - Accuracy: 98/100
📘 Epoch 15 - Loss: 6.8741 - Accuracy: 100/100
📘 Epoch 16 - Loss: 5.1186 - Accuracy: 100/100
📘 Epoch 17 - Loss: 4.5150 - Accuracy: 100/100
📘 Epoch 18 - Loss: 3.9146 - Accuracy: 100/100
📘 Epoch 19 - Loss: 3.6629 - Accuracy: 100/100
📘 Epoch 20 - Loss: 2.6891 - Accuracy: 100/100
📘 Epoch 21 - Loss: 2.5126 - Accuracy: 100/100
📘 Epoch 22 - Loss: 1.7171 - Accuracy: 10

In [None]:
# 🧪 Step 5: Inference (No if-else!)
def predict_commentary(sentence):
    model.eval()
    with torch.no_grad():
        encoded = torch.tensor([encode_sentence(sentence, max_len=10)])
        output = model(encoded)
        pred = torch.argmax(output, dim=1).item()
        return le.inverse_transform([pred])[0]

# 🔍 Try with new sentences
test_lines = [
    "He smashes it into the net!",
    "The ball is passed across the field.",
    "Great dribble from midfield!",
    "Misses the chance to equalize.",
    "That tackle might be a foul.",
]

print("\n🧠 Predictions:")
for line in test_lines:
    print(f"{line} → {predict_commentary(line)}")



🧠 Predictions:
He smashes it into the net! → GOAL
The ball is passed across the field. → PASS
Great dribble from midfield! → TACKLE
Misses the chance to equalize. → MISS
That tackle might be a foul. → FOUL
