# AIM BOT Detection in CS:GO
## COPS Summer of Code 2025 - Intelligence Guild
### Computer Vision Week 2 Assignment

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision.transforms as transforms
from torchvision.models.video import r3d_18
import cv2
import numpy as np
import os
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import time
import yaml

# Configuration
config = {
    'data_path': 'data/',
    'batch_size': 8,
    'epochs': 50,
    'lr': 0.001,
    'frames_per_clip': 30,
    'img_size': (128, 72),
    'device': torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
    'seed': 42
}

# Set seed for reproducibility
torch.manual_seed(config['seed'])
np.random.seed(config['seed'])

In [None]:
class CSGODataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data_dir = data_dir
        self.transform = transform
        self.classes = {'clean': 0, 'aimbot': 1}
        self.samples = []
        
        for label_name, label_val in self.classes.items():
            class_dir = os.path.join(data_dir, label_name)
            for video_file in os.listdir(class_dir):
                if video_file.endswith('.mp4'):
                    self.samples.append((os.path.join(class_dir, video_file), label_val))
    
    def __len__(self):
        return len(self.samples)
    
    def __getitem__(self, idx):
        video_path, label = self.samples[idx]
        frames = []
        
        cap = cv2.VideoCapture(video_path)
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame = cv2.resize(frame, config['img_size'])
            frames.append(frame)
        cap.release()
        
        # Sample fixed-length clip
        if len(frames) > config['frames_per_clip']:
            start_idx = np.random.randint(0, len(frames) - config['frames_per_clip'])
            frames = frames[start_idx:start_idx+config['frames_per_clip']]
        elif len(frames) < config['frames_per_clip']:
            # Pad with last frame
            frames += [frames[-1]] * (config['frames_per_clip'] - len(frames))
        
        # Convert to tensor (T, H, W, C) -> (C, T, H, W)
        clip = torch.tensor(np.array(frames)).permute(3, 0, 1, 2).float() / 255.0
        
        if self.transform:
            clip = self.transform(clip)
            
        return clip, label

In [None]:
class AIMBotDetector(nn.Module):
    def __init__(self):
        super().__init__()
        # Use pre-trained 3D ResNet
        self.backbone = r3d_18(pretrained=True)
        
        # Replace final layer
        num_features = self.backbone.fc.in_features
        self.backbone.fc = nn.Sequential(
            nn.Linear(num_features, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, 2)
        )
        
        # Attention mechanism
        self.attention = nn.Sequential(
            nn.Conv3d(512, 1, kernel_size=1),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        # Extract features
        features = self.backbone.stem(x)
        features = self.backbone.layer1(features)
        features = self.backbone.layer2(features)
        features = self.backbone.layer3(features)
        features = self.backbone.layer4(features)
        
        # Apply attention
        attn_weights = self.attention(features)
        attn_features = features * attn_weights
        
        # Pooling
        pooled = nn.functional.adaptive_avg_pool3d(attn_features, (1, 1, 1))
        pooled = torch.flatten(pooled, 1)
        
        # Classification
        return self.backbone.fc(pooled), attn_weights

In [None]:
# Data augmentation
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.RandomRotation(10)
])

# Create dataset
full_dataset = CSGODataset(config['data_path'], transform=transform)

# Split dataset
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=config['batch_size'], shuffle=False)

In [None]:
# Initialize model
model = AIMBotDetector().to(config['device'])

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=config['lr'])

# Training loop
train_losses, val_losses = [], []
train_accs, val_accs = [], []

for epoch in range(config['epochs']):
    # Training
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    start_time = time.time()
    
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(config['device']), labels.to(config['device'])
        
        optimizer.zero_grad()
        outputs, _ = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
    
    train_loss = running_loss / len(train_loader)
    train_acc = 100 * correct / total
    train_losses.append(train_loss)
    train_accs.append(train_acc)
    
    # Validation
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(config['device']), labels.to(config['device'])
            outputs, _ = model(inputs)
            loss = criterion(outputs, labels)
            
            val_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
            
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    val_loss = val_loss / len(val_loader)
    val_acc = 100 * correct / total
    val_losses.append(val_loss)
    val_accs.append(val_acc)
    
    epoch_time = time.time() - start_time
    
    print(f"Epoch {epoch+1}/{config['epochs']} | "
          f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}% | "
          f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2f}% | "
          f"Time: {epoch_time:.2f}s")

# Save model
torch.save(model.state_dict(), 'aimbot_detector.pth')

# Plot results
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Train Loss')
plt.plot(val_losses, label='Val Loss')
plt.legend()
plt.title('Loss Curve')

plt.subplot(1, 2, 2)
plt.plot(train_accs, label='Train Accuracy')
plt.plot(val_accs, label='Val Accuracy')
plt.legend()
plt.title('Accuracy Curve')
plt.savefig('training_results.png')

# Classification report
print("\nClassification Report:")
print(classification_report(all_labels, all_preds, target_names=['Clean', 'Aimbot']))

# Confusion matrix
cm = confusion_matrix(all_labels, all_preds)
plt.figure(figsize=(8, 6))
plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.colorbar()
tick_marks = np.arange(2)
plt.xticks(tick_marks, ['Clean', 'Aimbot'])
plt.yticks(tick_marks, ['Clean', 'Aimbot'])
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.savefig('confusion_matrix.png')

In [None]:
# Visualization function
def visualize_attention(video_path):
    cap = cv2.VideoCapture(video_path)
    frames = []
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame = cv2.resize(frame, config['img_size'])
        frames.append(frame)
    cap.release()
    
    # Process clip
    clip = torch.tensor(np.array(frames[:config['frames_per_clip']])).permute(3, 0, 1, 2).float() / 255.0
    clip = clip.unsqueeze(0).to(config['device'])
    
    # Get predictions and attention
    model.eval()
    with torch.no_grad():
        pred, attn_weights = model(clip)
    
    # Process attention
    attn_weights = attn_weights.squeeze().cpu().numpy()
    
    # Create overlay
    for i in range(len(frames)):
        frame = frames[i]
        heatmap = cv2.resize(attn_weights[i], (frame.shape[1], frame.shape[0]))
        heatmap = (heatmap * 255).astype(np.uint8)
        heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
        overlay = cv2.addWeighted(frame, 0.7, heatmap, 0.3, 0)
        
        plt.figure(figsize=(10, 6))
        plt.subplot(1, 2, 1)
        plt.imshow(frame)
        plt.title('Original Frame')
        plt.axis('off')
        
        plt.subplot(1, 2, 2)
        plt.imshow(overlay)
        plt.title('Attention Map')
        plt.axis('off')
        plt.tight_layout()
        plt.show()

# Example usage
# visualize_attention('data/aimbot/sample.mp4')

## Key Features of the Implementation
1. **3D CNN Architecture**: Uses PyTorch's pre-trained R3D-18 model for spatiotemporal feature extraction
2. **Attention Mechanism**: Visualizes regions of interest in gameplay frames
3. **Data Augmentation**: Horizontal flips, color jitter, and rotation for robustness
4. **Visualization Tools**: Attention mapping and performance metrics
5. **Efficient Processing**: Frame sampling and GPU acceleration

## Usage Instructions
1. Create dataset folder structure:
   ```
   data/
   ├── aimbot/
   │   ├── video1.mp4
   │   └── ...
   └── clean/
       ├── video1.mp4
       └── ...
   ```
2. Run all notebook cells
3. Visualize results with `visualize_attention()` function