# Base model

In [2]:
# 1. Setup and Imports
import os
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from tqdm.auto import tqdm


In [6]:
# Comprehensive PyTorch CUDA Installation and Diagnosis
import sys
import subprocess
import os

# 1. Check system info
print("=== SYSTEM INFORMATION ===")
print(f"Python version: {sys.version}")
print(f"Platform: {sys.platform}")

# 2. Check if CUDA is available on your system
print("\n=== CUDA SYSTEM CHECK ===")
try:
    # Check for NVIDIA driver
    if sys.platform == 'win32':
        nvidia_smi = subprocess.run(['nvidia-smi'], capture_output=True, text=True)
        if nvidia_smi.returncode == 0:
            print("NVIDIA driver found:")
            print(nvidia_smi.stdout.split('\n')[0])
        else:
            print("NVIDIA driver not found or not working")
    else:
        print("Non-Windows platform detected, skipping nvidia-smi check")
except Exception as e:
    print(f"Error checking NVIDIA driver: {e}")

# 3. Uninstall existing PyTorch
print("\n=== UNINSTALLING CURRENT PYTORCH ===")
!pip uninstall -y torch torchvision torchaudio

# 4. Install CUDA version with specific version
print("\n=== INSTALLING PYTORCH WITH CUDA ===")
!pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu118

# 5. Restart kernel notice
print("\n=== IMPORTANT ===")
print("RESTART YOUR KERNEL NOW by clicking 'Kernel > Restart Kernel'")
print("Then run the verification cell below in a new cell")

=== SYSTEM INFORMATION ===
Python version: 3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]
Platform: win32

=== CUDA SYSTEM CHECK ===
NVIDIA driver found:
Tue Apr  1 16:11:36 2025       

=== UNINSTALLING CURRENT PYTORCH ===
Found existing installation: torch 2.4.1
Uninstalling torch-2.4.1:
  Successfully uninstalled torch-2.4.1
Found existing installation: torchvision 0.19.1
Uninstalling torchvision-0.19.1:
  Successfully uninstalled torchvision-0.19.1
Found existing installation: torchaudio 2.4.1
Uninstalling torchaudio-2.4.1:
  Successfully uninstalled torchaudio-2.4.1

=== INSTALLING PYTORCH WITH CUDA ===


You can safely remove it manually.
You can safely remove it manually.


Looking in indexes: https://download.pytorch.org/whl/cu118
Collecting torch==2.1.0
  Downloading https://download.pytorch.org/whl/cu118/torch-2.1.0%2Bcu118-cp311-cp311-win_amd64.whl (2722.7 MB)
     ---------------------------------------- 0.0/2.7 GB ? eta -:--:--
     ---------------------------------------- 0.0/2.7 GB ? eta -:--:--
     ---------------------------------------- 0.0/2.7 GB 2.8 MB/s eta 0:16:17
     ---------------------------------------- 0.0/2.7 GB 2.4 MB/s eta 0:18:57
     ---------------------------------------- 0.0/2.7 GB 2.4 MB/s eta 0:18:57
     ---------------------------------------- 0.0/2.7 GB 2.4 MB/s eta 0:18:57
     ---------------------------------------- 0.0/2.7 GB 1.0 MB/s eta 0:43:57
     ---------------------------------------- 0.0/2.7 GB 1.1 MB/s eta 0:42:44
     ---------------------------------------- 0.0/2.7 GB 1.1 MB/s eta 0:42:44
     ---------------------------------------- 0.0/2.7 GB 1.1 MB/s eta 0:42:44
     -----------------------------------


[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: C:\Users\Nassi\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [16]:
# Check if these are installed
!pip install peft datasets --upgrade

Collecting peft
  Downloading peft-0.15.1-py3-none-any.whl.metadata (13 kB)
Downloading peft-0.15.1-py3-none-any.whl (411 kB)
Installing collected packages: peft
Successfully installed peft-0.15.1



[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: C:\Users\Nassi\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [None]:
import torch

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA version: {torch.version.cuda}")
    print(f"CUDA device: {torch.cuda.get_device_name(0)}")
    print(f"CUDA memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
else:
    print("\nPossible CUDA issues:")
    print("1. Your NVIDIA drivers may be outdated")
    print("2. You may not have a compatible NVIDIA GPU")
    print("3. Your CUDA toolkit may not be properly installed")


PyTorch version: 2.1.0+cu118
CUDA available: True
CUDA version: 11.8
CUDA device: NVIDIA GeForce RTX 4060 Laptop GPU
CUDA memory: 8.59 GB


In [3]:

# 4. Setup Gemma 3-1B Model with 4-bit Quantization
def setup_gemma_model():
    """Setup Gemma 3-1B model with 4-bit quantization to fit in 8GB VRAM"""
    try:
        # Configure 4-bit quantization for memory efficiency
        bnb_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_use_double_quant=True,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_compute_dtype=torch.bfloat16
        )
        
        # Load model and tokenizer
        print("Loading Gemma 3-1B tokenizer...")
        tokenizer = AutoTokenizer.from_pretrained("google/gemma-3-1b-it")
        
        print("Loading Gemma 3-1B model (4-bit quantized)...")
        model = AutoModelForCausalLM.from_pretrained(
            "google/gemma-3-1b-it",
            quantization_config=bnb_config,
            device_map="auto"
        )
        
        print("Model loaded successfully!")
        return model, tokenizer
    
    except Exception as e:
        print(f"Error loading model: {str(e)}")
        print("\nPossible issues:")
        print("1. You need to accept the Gemma model terms on Hugging Face")
        print("2. You need to be logged in with huggingface-cli login")
        print("3. Your token may not have permission to access this model")
        return None, None

# Load the model
model, tokenizer = setup_gemma_model()


Loading Gemma 3-1B tokenizer...
Loading Gemma 3-1B model (4-bit quantized)...


model.safetensors:   0%|          | 0.00/2.00G [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


generation_config.json:   0%|          | 0.00/215 [00:00<?, ?B/s]

Model loaded successfully!


In [15]:
# Improved Gemma 3-1B Football Highlights Summarization
def summarize_highlights(transcript, max_new_tokens=500):
    """Generate a summary of soccer highlights using Gemma 3-1B with improved soccer terminology"""
    if model is None or tokenizer is None:
        return "Model not loaded correctly. Please check setup."
    
    if not transcript or len(transcript.strip()) < 50:
        return "Please provide a valid highlights transcript."
    
    # Create improved prompt with soccer-specific terminology guidance
    
    prompt = f"""<start_of_turn>user
You are a professional soccer commentator with extensive experience covering major leagues. Create a concise, engaging summary of these match highlights. The transcript is from the match highlights video, some sentences may not be coherent, and there may be some errors, espically in the names of the teams or players. Make sure to use soccer-specific terminology and avoid unnecessary details. Focus on key moments, player performances, and the overall match flow. 
Use the following transcript summary as a reference:
"Barcelona played against Las Palmas, and the game saw several key moments. Las Palmas had a strong start, but Barcelona's defense held strong. The turning point came when Danny Holmo scored a brilliant goal, slicing through Las Palmas' defense and ending his personal goal drought. This goal was a highlight of the match, with Holmo showcasing his ability to operate in tight spaces and finish with precision.

Barcelona continued to dominate, with Lamine Yamal and Rafinha creating scoring opportunities. The team's high press and ability to operate in tight spaces caused problems for Las Palmas. Although Las Palmas had some chances, Barcelona's defense was solid, and they were able to contain the opposition's attacks.

In the second half, Ferran Torres scored another goal for Barcelona, securing the win with a powerful left-footed shot. The game ended with Barcelona taking all three points, thanks to their strong performance and Hansi Flick's tactical decisions. The final score was 2-0 in favor of Barcelona, with Danny Holmo and Ferran Torres scoring the goals. Danny Holmo was named MVP, and the win gave Barcelona an extra confidence boost ahead of their cup semi-finals match."

HIGHLIGHTS TRANSCRIPT:
{transcript}
<end_of_turn>

<start_of_turn>model
"""
    
    #prompt2 = "how much do you know about soccer teams and players? do you know their names? "
    
    # Rest of the function remains the same
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            temperature=0.5,  # Reduced further for more factual output
            top_p=0.85,
            do_sample=True,
            no_repeat_ngram_size=3,
            length_penalty=1.0
        )
    
    full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    if "<start_of_turn>model" in full_response:
        summary = full_response.split("<start_of_turn>model")[-1].strip()
    else:
        prompt_end = prompt.strip()
        summary = full_response[len(prompt_end):].strip()
    
    return summary

# Process the highlights transcript and display results
print("===== TESTING IMPROVED GEMMA 3-1B ON FOOTBALL HIGHLIGHTS =====\n")

if len(highlights_transcript.strip()) < 50:
    print("⚠️ Please edit the cell to add your highlights transcript to the variable!")
else:
    print(f"Processing highlights transcript ({len(highlights_transcript)} characters)...")
    
    # Generate summary
    summary = summarize_highlights(highlights_transcript)
    
    print("\n===== SUMMARY OF HIGHLIGHTS =====")
    print(summary)

===== TESTING IMPROVED GEMMA 3-1B ON FOOTBALL HIGHLIGHTS =====

Processing highlights transcript (9933 characters)...

===== SUMMARY OF HIGHLIGHTS =====
mmary, aiming for a professional and engaging style, incorporating the transcript you provided.  I’ve focused on the key moments and nuances, aiming to capture the flow of the action and the strategic implications.

---

**Match Highlights: Barcelona 3 - 0 Las Palmas – A Dominant Display**

**Opening Moments & Defensive Focus:**

The game began with a defensive intensity. Diego Martinez & Blasparms immediately established a strong, almost suffocating, defensive shape. The initial phase saw a clear attempt to clear the ball, with a potential early challenge from the visitors.  The initial challenge was a slight hesitation, suggesting a cautious approach.

**The Breakthrough – Danny Holmos Stunning Goal:**

Then, with just under 10 minutes on the clock, a brilliant moment unfolded. Danny Holmes, the last Palmeiras defender, scored a stun

---

---

In [12]:
# Simple test script that avoids tensorflow dependency issues
import os
import torch
import warnings
warnings.filterwarnings("ignore")

from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
import time
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
import json

# Model path
model_path = "../summarizer/models/soccer-summarizer-improved"

def test_model_gpu():
    """Load and test your fine-tuned model on GPU"""
    print("Loading adapter configuration...")
    
    # Get base model from PEFT config
    peft_config = PeftConfig.from_pretrained(model_path)
    base_model_name = peft_config.base_model_name_or_path
    print(f"Base model: {base_model_name}")
    
    # Check GPU availability
    if not torch.cuda.is_available():
        raise RuntimeError("GPU not available! This script requires CUDA.")
    
    print(f"Using GPU: {torch.cuda.get_device_name(0)}")
    print(f"Available GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
    
    # Load tokenizer
    print("Loading tokenizer...")
    tokenizer = AutoTokenizer.from_pretrained(base_model_name)
    
    """
    # Load base model directly to GPU with correct dtype
    print("Loading base model on GPU...")
    base_model = AutoModelForCausalLM.from_pretrained(
        base_model_name,
        torch_dtype=torch.float16,  # Keep half precision for GPU
        device_map="cuda:0"        # Explicitly use CUDA, not auto-mapping
    )
    
    
    # Load adapter
    print("Loading LoRA adapter...")
    try:
        model = PeftModel.from_pretrained(
            base_model, 
            model_path,
            is_trainable=False
        )
        print("LoRA adapter loaded successfully!")
    except Exception as e:
        print(f"Error loading adapter: {e}")
        print("Falling back to base model...")
        model = base_model
    """
    # Test with just the base model
    model = AutoModelForCausalLM.from_pretrained(
        "google/gemma-3-1b-it",
        torch_dtype=torch.float16,
        device_map="cuda:0"
    )
    # Test function
    def generate_summary(transcript, reference_teams=None, reference_players=None):
        # Create reference text if teams and players provided
        reference_text = ""
        if reference_teams and reference_players:
            reference_text = "CORRECT NAMES REFERENCE:\n"
            reference_text += f"TEAMS: {', '.join(reference_teams)}\n"
            reference_text += f"PLAYERS: {', '.join(reference_players)}\n\n"
        
        prompt = f"""<start_of_turn>user
You are a professional soccer commentator. Create a concise, engaging summary of the soccer match based on the transcript given below.
The transcript is created through speech to text recognition which can induce errors in player and team names, correct them when possible. 
Specify at the end the final score for the match.

{reference_text}
HIGHLIGHTS TRANSCRIPT:
{transcript}
<end_of_turn>

<start_of_turn>model
"""
        
        # Measure generation time
        start_time = time.time()
        
        # Generate summary
        print("Generating summary...")

        # Prepare input and move to GPU
        inputs = tokenizer(prompt, return_tensors="pt").to("cuda:0")
        
        
        
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=600,
                temperature=0.7,
                top_p=0.9,
                do_sample=True
            )
        
        # Process output
        full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        if "<start_of_turn>model" in full_response:
            summary = full_response.split("<start_of_turn>model")[-1].strip()
        else:
            summary = full_response[len(prompt):].strip()
        
        gen_time = time.time() - start_time
        print(f"Generation completed in {gen_time:.2f} seconds")
        
        return summary
    
    # Test with a sample transcript
    
    
    # Define reference teams and players
    test_teams = ["Barcelona", "Las Palmas"]
    test_players = ["Danny Omo", "Mika Mármol", "Raphinha", "Lamine Yamal"]
    
    test_teams = ""
    test_players = ""
    # Generate summary
    summary = generate_summary(test_transcript, test_teams, test_players)
    
    print("\n=== GENERATED SUMMARY ===")
    print(summary)
    
    return summary

# Run the test
test_model_gpu()

Loading adapter configuration...
Base model: google/gemma-3-1b-it
Using GPU: NVIDIA GeForce RTX 4060 Laptop GPU
Available GPU memory: 8.59 GB
Loading tokenizer...
Generating summary...
Generation completed in 19.63 seconds

=== GENERATED SUMMARY ===
commentator’s summary of the match, incorporating the transcript and aiming for a dynamic feel:

**Blast from the Past! Barcelona Dominate, But a Late Collapse**

The match between Diego Martinez and Blasparmas saw Barcelona unleash a stunning attack, but ultimately succumbed to a frustratingly defensive struggle.  The game started with a flurry of movement, with Jenny Palmer starting the defense, and Alex Sware at center back.  Early on, Martinez and Blasparmas initiated the attack, with a quick free kick from the visitors, prompting a desperate scramble for the ball.  Barcelona’s midfield, spearheaded by the dynamic Danny Omo, began to exert pressure, but the defense struggled to contain the attacking threat. 

The match quickly shifted m

"commentator’s summary of the match, incorporating the transcript and aiming for a dynamic feel:\n\n**Blast from the Past! Barcelona Dominate, But a Late Collapse**\n\nThe match between Diego Martinez and Blasparmas saw Barcelona unleash a stunning attack, but ultimately succumbed to a frustratingly defensive struggle.  The game started with a flurry of movement, with Jenny Palmer starting the defense, and Alex Sware at center back.  Early on, Martinez and Blasparmas initiated the attack, with a quick free kick from the visitors, prompting a desperate scramble for the ball.  Barcelona’s midfield, spearheaded by the dynamic Danny Omo, began to exert pressure, but the defense struggled to contain the attacking threat. \n\nThe match quickly shifted momentum with a brilliant goal from Barcelona's striker, who was rewarded with a stunning flick from the right back, creating a moment of magic.  However, Barcelona’s composure started to unravel as they struggled to maintain control.  The visi

---

# New Finetuning

In [21]:
# Complete fine-tuning pipeline for soccer match summarizer
import os
import pandas as pd
import numpy as np
import torch
from datasets import Dataset
from transformers import (
    AutoModelForCausalLM, 
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling
)
from peft import LoraConfig, get_peft_model, TaskType, prepare_model_for_kbit_training
from tqdm.notebook import tqdm
import random
from huggingface_hub import login
import warnings
warnings.filterwarnings("ignore")

from transformers import EarlyStoppingCallback

In [14]:

# Check GPU availability
print(f"GPU available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")


GPU available: True
GPU: NVIDIA GeForce RTX 4060 Laptop GPU
Memory: 8.59 GB


In [15]:

# Set random seed for reproducibility
random.seed(42)
torch.manual_seed(42)
np.random.seed(42)


In [23]:

# Step 1: Data preparation
# Replace with your data loading code as needed
def load_soccer_data(data_path=None):
    """
    Load the soccer transcripts dataset with 30 examples.
    
    If data_path is provided, it will load from that file.
    Otherwise, use the transcripts.xlsx file from the data directory.
    """
    if data_path and os.path.exists(data_path):
        # Load from specified file path
        if data_path.endswith('.csv'):
            df = pd.read_csv(data_path)
        elif data_path.endswith('.xlsx') or data_path.endswith('.xls'):
            df = pd.read_excel(data_path)
        else:
            raise ValueError(f"Unsupported file format: {data_path}")
    else:
        # Load from default dataset path
        default_path = "../../data/transcripts.xlsx"
        print(f"Loading from default dataset path: {default_path}")
        
        try:
            # Load the transcript-summary pairs from Excel
            df = pd.read_excel(default_path)
            
            # Validate required columns
            required_cols = ['transcript', 'summary']
            if not all(col in df.columns for col in required_cols):
                raise ValueError(f"Dataset must contain columns: {required_cols}")
                
        except Exception as e:
            print(f"Error loading default dataset: {e}")
            # print("Falling back to hardcoded examples...")
            # If default dataset fails, fall back to example data
            # return prepare_soccer_data()
            
    print(f"Loaded {len(df)} examples")
    
    # Perform basic data cleaning
    df['transcript'] = df['transcript'].astype(str).apply(lambda x: x.strip())
    df['summary'] = df['summary'].astype(str).apply(lambda x: x.strip())
    
    # Remove any examples with too short content
    min_transcript_length = 100
    min_summary_length = 50
    valid_df = df[(df['transcript'].str.len() > min_transcript_length) & 
                 (df['summary'].str.len() > min_summary_length)]
    
    if len(valid_df) < len(df):
        print(f"Filtered out {len(df) - len(valid_df)} examples that were too short")
    
    return valid_df


In [46]:
def prepare_model_for_training(model_name="google/gemma-3-1b-it"):
    """
    Memory-optimized model loading for 8GB GPUs
    """
    print(f"Loading base model: {model_name}")
    
    # Clear CUDA cache
    torch.cuda.empty_cache()
    
    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    tokenizer.pad_token = tokenizer.eos_token
    
    # Import bitsandbytes for quantization
    try:
        import bitsandbytes as bnb
    except ImportError:
        print("Installing bitsandbytes...")
        
        import bitsandbytes as bnb
    
    # QLoRA 4-bit configuration
    from transformers import BitsAndBytesConfig
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,
        llm_int8_enable_fp32_cpu_offload=True,
    )
    
    # Load model with extreme memory optimization
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=bnb_config,
        torch_dtype=torch.bfloat16,
        low_cpu_mem_usage=True,
        device_map="auto",
    )
    
    # Even more conservative LoRA setup
    peft_config = LoraConfig(
        r=2,  # Reduced from 4 to 2
        lora_alpha=4,  # Reduced from 8 to 4
        lora_dropout=0.05,
        bias="none",
        task_type=TaskType.CAUSAL_LM,
        target_modules=["q_proj", "v_proj"],  # Only essential attention components
    )
    
    model = prepare_model_for_kbit_training(model)
    model = get_peft_model(model, peft_config)
    
    print(f"Trainable parameters: {model.print_trainable_parameters()}")
    
    return model, tokenizer

In [25]:
def tokenize_data(dataset, tokenizer, max_length=2048):
    """
    Tokenize the dataset for training.
    """
    def tokenize_function(examples):
        return tokenizer(
            examples["text"],
            truncation=True,
            max_length=max_length,
            padding="max_length"
        )
    
    tokenized_dataset = dataset.map(
        tokenize_function, 
        batched=True,
        desc="Tokenizing data",
        remove_columns=["text"]
    )
    
    return tokenized_dataset

In [26]:
# Step 2: Split data into train and validation sets
def split_train_val(df, val_size=0.1):
    """
    Split the dataset into training and validation sets.
    With 30 examples, a 90/10 split is reasonable.
    """
    # Shuffle the dataframe
    df = df.sample(frac=1, random_state=42).reset_index(drop=True)
    
    # Calculate split point
    val_count = max(1, int(len(df) * val_size))
    
    # Split the data
    train_df = df[val_count:].reset_index(drop=True)
    val_df = df[:val_count].reset_index(drop=True)
    
    print(f"Split data into {len(train_df)} training and {len(val_df)} validation examples")
    
    return train_df, val_df


In [27]:
# Load team and player data from your existing datasets
def load_soccer_knowledge():
    """
    Load team and player names from your existing datasets to incorporate
    into the model's knowledge.
    """
    print("Loading team and player data...")
    
    # Load team names from club_statistics.csv
    teams_df = pd.read_csv("../../data/club_statistics.csv")
    team_names = teams_df["Club Name"].dropna().unique().tolist()
    print(f"Loaded {len(team_names)} team names")
    
    # Load player names from players_data_by_country_extended.csv
    players_df = pd.read_csv("../../data/players_data_by_country_extended.csv")
    player_names = players_df["Player"].dropna().unique().tolist()
    # Limit to reasonable number to avoid overwhelming the model
    player_names = player_names[:1000]  # Adjust this number as needed
    print(f"Using {len(player_names)} player names")
    
    return team_names, player_names


In [28]:

# Create knowledge enhancement examples
def create_knowledge_examples(team_names, player_names, num_examples=8):
    """
    Create examples that help the model learn about teams and players
    without explicitly providing them in every prompt.
    """
    knowledge_examples = []
    
    # Example 1: Team knowledge
    knowledge_examples.append({
        "transcript": "The match between Barselona and Rel Madrid was intense. Levandoski scored for Barsa while Venicious Jr scored for Madrid.",
        "summary": "The El Clásico between Barcelona and Real Madrid lived up to its reputation with an intense battle on the pitch. Robert Lewandowski found the net for Barcelona, while Vinícius Júnior scored for Real Madrid in this hard-fought contest."
    })
    
    # Example 2: Premier League teams
    knowledge_examples.append({
        "transcript": "Mancester United faced Arsenl in a crucial match. Marcus Rasford scored for Man U while Bukayo Sako equalized for the Gunners.",
        "summary": "Manchester United and Arsenal delivered an exciting Premier League clash. Marcus Rashford put United ahead before Bukayo Saka equalized for the Gunners in this important fixture between these historic rivals."
    })
    
    # Example 3: Mix of leagues
    knowledge_examples.append({
        "transcript": "Byern Munich dominated against Dortmnd, with Harry Ken scoring a hat-trick. Jude Belingam was missed by Dortmund.",
        "summary": "Bayern Munich claimed a commanding victory in Der Klassiker against Borussia Dortmund. Harry Kane was the star performer, netting a hat-trick. Dortmund clearly missed their former midfielder Jude Bellingham, who now plays for Real Madrid."
    })

    # Example 4: Champions League match
    knowledge_examples.append({
        "transcript": "Liverpul took on Bayern Munchen in the Champions League. Mohamed Salih scored for the Reds while Lewandoski equalized for the Germans.",
        "summary": "Liverpool faced off against Bayern Munich in a thrilling Champions League encounter. Mohamed Salah found the net for the Reds, but Robert Lewandowski equalized for the German giants, setting up a tense finish."
    })

    # Example 5: La Liga match
    knowledge_examples.append({
        "transcript": "Atletico Madird played against Sevila. Alvaro Morata scored for Atleti while Youssef En-Nesyri equalized for the Andalusians.",
        "summary": "Atlético Madrid clashed with Sevilla in a crucial La Liga match. Álvaro Morata put Atleti ahead, but Youssef En-Nesyri equalized for Sevilla, leaving the outcome hanging in the balance."
    })

    # Example 6: Serie A match
    knowledge_examples.append({
        "transcript": "Juventus took on Inter Milan in the Derby d'Italia. Dusan Vlahovic scored for the Bianconeri while Lautaro Martinez equalized for the Nerazzurri.",
        "summary": "Juventus faced off against Inter Milan in the highly anticipated Derby d'Italia. Dušan Vlahović found the net for the Bianconeri, but Lautaro Martínez equalized for the Nerazzurri, setting up a thrilling conclusion to the match."
    })

    # Example 7: International friendly
    knowledge_examples.append({
        "transcript": "France played against Argentna in a friendly match. Kylian Mbappe scored for the Blues while Lionel Messi equalized for the Albiceleste.",
        "summary": "France took on Argentina in an exciting international friendly. Kylian Mbappé put the Blues ahead, but Lionel Messi equalized for the Albiceleste, showcasing the talent and skill of these two football powerhouses."
    })

    # Example 8: Premier League match
    knowledge_examples.append({
        "transcript": "Mancester City faced Chelsie in a crucial match. Erling Haaland scored for City while Kai Havertz equalized for the Blues.",
        "summary": "Manchester City clashed with Chelsea in a pivotal Premier League encounter. Erling Haaland found the net for City, but Kai Havertz equalized for the Blues, leaving the outcome of the match uncertain."
    })
    
    # Add more examples that demonstrate correct name usage
    # These will help the model learn the correct spellings without explicit lists
    
    return knowledge_examples


In [29]:

# Updated format_data function that doesn't include explicit references
def format_soccer_data_without_references(df):
    """
    Format the data into prompt-completion pairs without explicit name references.
    """
    formatted_data = []
    
    for _, row in tqdm(df.iterrows(), total=len(df), desc="Formatting data"):
        # Create prompt with instructions but no reference lists
        prompt = f"""<start_of_turn>user
You are a professional soccer commentator. Create a concise, engaging summary of these match highlights.
The transcript may contain speech recognition errors in player and team names - please correct them in your summary.

HIGHLIGHTS TRANSCRIPT:
{row['transcript']}
<end_of_turn>

<start_of_turn>model
{row['summary']}
<end_of_turn>
"""
        formatted_data.append({"text": prompt})
    
    # Convert to Dataset format
    dataset = Dataset.from_pandas(pd.DataFrame(formatted_data))
    return dataset


In [47]:
def train_summarizer_model(model, tokenizer, train_dataset, val_dataset=None, output_dir="../summarizer/models/soccer-summarizer-improved"):
    """
    Memory-optimized training function for 8GB GPUs
    """
    from transformers import get_scheduler
    
    os.makedirs(output_dir, exist_ok=True)
    
    # More memory-efficient training args
    training_args = TrainingArguments(
        output_dir=output_dir,
        num_train_epochs=1,  # Reduced to 1 epoch to start
        per_device_train_batch_size=1,
        per_device_eval_batch_size=1,
        gradient_accumulation_steps=16,  # Increased for better stability with small batches
        learning_rate=1e-4,
        weight_decay=0.01,
        max_grad_norm=0.3,
        warmup_ratio=0.03,
        fp16=True,
        optim="adamw_torch",
        logging_steps=5,
        evaluation_strategy="no",  # Disable evaluation temporarily to save memory
        save_strategy="epoch",  # Only save at end of epoch
        save_total_limit=1,  # Keep only the best checkpoint
        report_to="none",
        remove_unused_columns=False,  # Important for some models
        # Gradient checkpointing
        gradient_checkpointing=True,
        # Deepspeed ZeRO-2 config for memory efficiency
        deepspeed="ds_config.json"  # You'll need to create this file
    )
    
    # Data collator
    data_collator = DataCollatorForLanguageModeling(
        tokenizer=tokenizer,
        mlm=False
    )
    
    # Initialize trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=None,  # Disable eval temporarily
        data_collator=data_collator,
    )
    
    print("Starting training...")
    trainer.train()
    
    trainer.save_model(output_dir)
    print(f"Model saved to {output_dir}")
    
    return trainer, model

In [45]:
# Save deepspeed config
with open("ds_config.json", "w") as f:
    f.write("""
{
    "fp16": {
        "enabled": true,
        "loss_scale": 0,
        "loss_scale_window": 1000,
        "initial_scale_power": 16,
        "hysteresis": 2,
        "min_loss_scale": 1
    },
    "zero_optimization": {
        "stage": 2,
        "offload_optimizer": {
            "device": "cpu",
            "pin_memory": true
        },
        "allgather_partitions": true,
        "allgather_bucket_size": 5e8,
        "overlap_comm": true,
        "reduce_scatter": true,
        "reduce_bucket_size": 5e8,
        "contiguous_gradients": true
    },
    "gradient_accumulation_steps": 16,
    "gradient_clipping": 0.3,
    "steps_per_print": 10
}
""")

In [31]:

def run_enhanced_training_pipeline(data_path=None):
    """
    Run the complete training pipeline with proper imports and function references.
    """
    # Ensure all necessary imports
    from datasets import Dataset
    from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, EarlyStoppingCallback
    from transformers import DataCollatorForLanguageModeling
    from peft import LoraConfig, get_peft_model, TaskType, prepare_model_for_kbit_training
    
    # 1. Load soccer data
    df = load_soccer_data(data_path)
    
    # 2. Load team and player knowledge
    team_names, player_names = load_soccer_knowledge()
    
    # 3. Create knowledge enhancement examples
    knowledge_examples = create_knowledge_examples(team_names, player_names)
    
    # 4. Add knowledge examples to training data
    knowledge_df = pd.DataFrame(knowledge_examples)
    enhanced_df = pd.concat([knowledge_df, df], ignore_index=True)
    print(f"Added {len(knowledge_df)} knowledge examples to {len(df)} training examples")
    
    # 5. Split data for training/validation
    train_df, val_df = split_train_val(enhanced_df, val_size=0.1)
    
    # 6. Format data without explicit references
    train_dataset = format_soccer_data_without_references(train_df)
    val_dataset = format_soccer_data_without_references(val_df)
    
    # 7. Prepare model and tokenizer
    model, tokenizer = prepare_model_for_training()
    
    # 8. Tokenize data
    train_tokenized = tokenize_data(train_dataset, tokenizer)
    val_tokenized = tokenize_data(val_dataset, tokenizer)
    
    # 9. Train the model
    trainer, model = train_summarizer_model(
        model, 
        tokenizer,
        train_tokenized, 
        val_tokenized,
        output_dir="../summarizer/models/soccer-summarizer-improved"
    )
    
    return model, tokenizer, trainer


In [32]:

# Updated test function without references
def test_enhanced_model(model_path, test_transcript):
    """
    Test the trained model without providing explicit references.
    """
    from peft import PeftModel, PeftConfig
    
    # Load the trained model
    print("Loading trained model...")
    peft_config = PeftConfig.from_pretrained(model_path)
    base_model_name = peft_config.base_model_name_or_path
    
    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(base_model_name)
    
    # Load model
    base_model = AutoModelForCausalLM.from_pretrained(
        base_model_name,
        torch_dtype=torch.float16,
        device_map="cuda:0"
    )
    
    # Load LoRA adapter
    model = PeftModel.from_pretrained(
        base_model, 
        model_path,
        is_trainable=False
    )
    
    # Format prompt without references
    prompt = f"""<start_of_turn>user
You are a professional soccer commentator. Create a concise, engaging summary of these match highlights.
The transcript may contain speech recognition errors in player and team names - please correct them in your summary.

HIGHLIGHTS TRANSCRIPT:
{test_transcript}
<end_of_turn>

<start_of_turn>model
"""
    
    # Generate summary
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda:0")
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=500,
            temperature=0.7,
            top_p=0.9,
            do_sample=True
        )
    
    # Extract summary
    full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    summary = full_response.split("<start_of_turn>model")[-1].strip()
    
    print("\n=== GENERATED SUMMARY ===")
    print(summary)
    
    return summary

In [48]:
# Run the entire pipeline
model, tokenizer, trainer = run_enhanced_training_pipeline()

Loading from default dataset path: ../../data/transcripts.xlsx
Loaded 30 examples
Loading team and player data...
Loaded 7109 team names
Using 1000 player names
Added 8 knowledge examples to 30 training examples
Split data into 35 training and 3 validation examples


Formatting data:   0%|          | 0/35 [00:00<?, ?it/s]

Formatting data:   0%|          | 0/3 [00:00<?, ?it/s]

Loading base model: google/gemma-3-1b-it


RuntimeError: CUDA error: out of memory
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [None]:
# After training completes, test with a new transcript
test_transcript = """
Your test transcript goes here...
"""
summary = test_enhanced_model("../summarizer/models/soccer-summarizer-improved", test_transcript)

---

In [50]:
torch.cuda.empty_cache()

RuntimeError: CUDA error: out of memory
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [5]:
# Load and test the existing model without training
import os
import re
from peft import PeftModel, PeftConfig
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

def load_transcript_from_srt(srt_path):
    """
    Extract transcript text from an SRT file
    """
    with open(srt_path, 'r', encoding='utf-8') as f:
        content = f.read()
    
    # Remove SRT formatting (numbers and timestamps)
    # Pattern matches: number, timestamp --> timestamp, then captures the text
    srt_pattern = r'\d+\n\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}\n(.*?)(?=\n\n\d+\n|$)'
    
    # Find all text segments
    matches = re.findall(srt_pattern, content, re.DOTALL)
    
    # Combine all text into a single transcript
    transcript = ' '.join([m.replace('\n', ' ') for m in matches])
    
    return transcript

def test_model_with_checkpoint(model_path, srt_path):
    """
    Test the trained model using an existing SRT file
    """
    print(f"Loading model from: {model_path}")
    print(f"Testing with transcript from: {srt_path}")
    
    # Load transcript from SRT
    transcript = load_transcript_from_srt(srt_path)
    print(f"Loaded transcript ({len(transcript)} chars)")
    
    # Load the PEFT config
    peft_config = PeftConfig.from_pretrained(model_path)
    base_model_name = peft_config.base_model_name_or_path
    
    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(base_model_name)
    
    # Load base model
    print("Loading base model...")
    base_model = AutoModelForCausalLM.from_pretrained(
        base_model_name,
        torch_dtype=torch.float16,
        device_map="auto"
    )
    
    # Load LoRA adapter
    print("Loading LoRA adapter...")
    model = PeftModel.from_pretrained(
        base_model, 
        model_path,
        is_trainable=False
    )
    
    # Format prompt without references
    prompt = f"""<start_of_turn>user
You are a professional soccer commentator. Create a concise, engaging summary of these match highlights.
The transcript may contain speech recognition errors in player and team names - please correct them in your summary.

HIGHLIGHTS TRANSCRIPT:
{transcript}
<end_of_turn>

<start_of_turn>model
"""
    
    # Generate summary
    print("Generating summary...")
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=500,
            temperature=0.7,
            top_p=0.9,
            do_sample=True
        )
    
    # Extract summary
    full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    summary = full_response.split("<start_of_turn>model")[-1].strip()
    
    print("\n=== GENERATED SUMMARY ===")
    print(summary)
    
    return summary

# Path to your existing SRT file
srt_path = "../../processed_videos/Las_Palmas_vs._Barcelona___LALIGA_Highlights___ESPN_FC_720.srt"

# Path to your trained model - checkpoint 60
model_path = "../summarizer/models/soccer-summarizer-improved/checkpoint-20"

# Test the model
summary = test_model_with_checkpoint(model_path, srt_path)

Loading model from: ../summarizer/models/soccer-summarizer-improved/checkpoint-20
Testing with transcript from: ../../processed_videos/Las_Palmas_vs._Barcelona___LALIGA_Highlights___ESPN_FC_720.srt
Loaded transcript (6468 chars)
Loading base model...
Loading LoRA adapter...
Generating summary...

=== GENERATED SUMMARY ===
user
You are a professional soccer commentator. Create a concise, engaging summary of these match highlights.
The transcript may contain speech recognition errors in player and team names - please correct them in your summary.

HIGHLIGHTS TRANSCRIPT:
And this is the starting lineup for Diego Martinez and Blasparmas as you mentioned defense has been there a kill is here Will there be Marmo moving inside to play alongside Alex Sware is the captain at center back Jenny the last Palmer starting lineup Alex what do you make of Barcelona's starting lineup today most of the changes for Handsy flick in recent weeks have come in the center back pairing five different center ba

In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from huggingface_hub import login

def chat_with_base_model(model_name="google/gemma-3-1b-it"):
    """
    Interactive chat with the base model without fine-tuning
    """
    # Make sure you're logged in (needed for Gemma models)
    # login()  # Uncomment if needed
    
    print(f"Loading base model: {model_name}")
    
    # Load tokenizer and model
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.float16,
        device_map="auto"
    )
    
    # Chat history - keeps context
    chat_history = []
    
    print("\n===== Chat with Base Model =====")
    print("Type 'exit' to end the conversation")
    
    while True:
        # Get user input
        user_input = input("\nYou: ")
        if user_input.lower() == 'exit':
            break
        
        # Add user message to history
        chat_history.append({"role": "user", "content": user_input})
        
        # Format the entire chat history into Gemma's format
        prompt = ""
        for message in chat_history:
            role = message["role"]
            content = message["content"]
            prompt += f"<start_of_turn>{role}\n{content}<end_of_turn>\n\n"
        
        # Add the model's turn
        prompt += "<start_of_turn>model\n"
        
        # Generate response
        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
        
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=500,
                temperature=0.7,
                top_p=0.9,
                do_sample=True
            )
        
        # Extract and clean up the response
        full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        model_response = full_response.split("<start_of_turn>model\n")[-1].split("<end_of_turn>")[0].strip()
        
        # Display model response
        print(f"\nModel: {model_response}")
        
        # Add model response to history
        chat_history.append({"role": "model", "content": model_response})

# Run the chat interface
chat_with_base_model()

Loading base model: google/gemma-3-1b-it

===== Chat with Base Model =====
Type 'exit' to end the conversation

Model: user
I'm gonna give you a soccer match transcript, in the SRT format, I want you to provide a summary of the match and at the end specify the final score, the match is between Barcelona and Las Palmas: 1 00:00:00,000 --> 00:00:06,980 And this is the starting lineup for Diego Martinez and Blasparmas as you mentioned defense has been there a kill is here  2 00:00:06,980 --> 00:00:12,240 Will there be Marmo moving inside to play alongside Alex Sware is the captain at center back  3 00:00:12,240 --> 00:00:17,839 Jenny the last Palmer starting lineup Alex what do you make of Barcelona's starting lineup today most of the changes for  4 00:00:17,839 --> 00:00:23,839 Handsy flick in recent weeks have come in the center back pairing five different center back pairings in the last five games today  5 00:00:23,839 --> 00:00:26,280 It's Kubasi and Eric Garcia  6 00:00:27,280 --> 0

OutOfMemoryError: CUDA out of memory. Tried to allocate 2.54 GiB. GPU 0 has a total capacty of 8.00 GiB of which 0 bytes is free. Of the allocated memory 11.37 GiB is allocated by PyTorch, and 1.60 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [3]:
import os
import gc
import torch
import re
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

class SummarizerService:
    """Service to summarize soccer match transcripts using the base Gemma model."""
    
    def __init__(self, model_name="google/gemma-3-1b-it", use_8bit=True):
        self.model_name = model_name
        self.model = None
        self.tokenizer = None
        self.use_8bit = use_8bit
    
    def load_model(self):
        """Load the model with memory optimizations."""
        if self.model is not None:
            # Model already loaded
            return
            
        # Clear GPU memory
        torch.cuda.empty_cache()
        gc.collect()
        
        print(f"Loading model: {self.model_name}")
        
        # Load tokenizer
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
        
        if self.use_8bit:
            # 8-bit quantization for memory efficiency
            bnb_config = BitsAndBytesConfig(
                load_in_8bit=True,
                llm_int8_skip_modules=["embed_tokens", "lm_head"]
            )
            
            # Load in 8-bit mode
            self.model = AutoModelForCausalLM.from_pretrained(
                self.model_name,
                quantization_config=bnb_config,
                device_map="auto",
                torch_dtype=torch.float16,
                max_memory={0: "7GiB", "cpu": "12GiB"}
            )
        else:
            # Load in FP16 mode
            self.model = AutoModelForCausalLM.from_pretrained(
                self.model_name,
                torch_dtype=torch.float16,
                device_map="auto"
            )
            
        print("Model loaded successfully")
    
    def unload_model(self):
        """Free up GPU memory by deleting the model."""
        if self.model is not None:
            del self.model
            self.model = None
            del self.tokenizer
            self.tokenizer = None
            torch.cuda.empty_cache()
            gc.collect()
            print("Model unloaded from memory")
    
    def load_transcript_from_srt(self, srt_path):
        """Extract transcript text from an SRT file."""
        with open(srt_path, 'r', encoding='utf-8') as f:
            content = f.read()
        
        # Remove SRT formatting (numbers and timestamps)
        srt_pattern = r'\d+\n\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}\n(.*?)(?=\n\n\d+\n|$)'
        matches = re.findall(srt_pattern, content, re.DOTALL)
        transcript = ' '.join([m.replace('\n', ' ') for m in matches])
        
        return transcript
    
    """def get_soccer_knowledge(self):
        "Get team and player data to enhance prompt context."
        # Load team data
        team_data_path = os.path.join(os.path.dirname(__file__), '..', 'data', 'club_statistics.csv')
        player_data_path = os.path.join(os.path.dirname(__file__), '..', 'data', 'players_data_by_country_extended.csv')
        
        teams = []
        players = []
        
        try:
            import pandas as pd
            if os.path.exists(team_data_path):
                teams_df = pd.read_csv(team_data_path)
                if "Club Name" in teams_df.columns:
                    teams = teams_df["Club Name"].dropna().unique().tolist()[:20]  # Top 20 teams
                    
            if os.path.exists(player_data_path):
                players_df = pd.read_csv(player_data_path)
                if "Player" in players_df.columns:
                    players = players_df["Player"].dropna().unique().tolist()[:50]  # Top 50 players
        except Exception as e:
            print(f"Error loading soccer knowledge: {e}")
            
        return teams, players"""
    
    def create_optimized_prompt(self, transcript, teams=None, players=None):
        """Create an optimized prompt that guides the model to produce high-quality summaries."""
        # Get a few important teams and players if not provided
        # if teams is None or players is None:
        #    teams, players = self.get_soccer_knowledge()
            
        # Create a knowledge-rich prompt with examples of good summarization
        prompt = f"""<start_of_turn>user
You are a professional soccer commentator tasked with creating concise, engaging summaries of match highlights.

Context: The transcript contains speech recognition errors in player and team names - please correct these in your summary.

Please summarize the following match highlights in a concise, engaging paragraph:

HIGHLIGHTS TRANSCRIPT:
{transcript}

Create a summary that:
1. Correctly identifies teams and players (fixing any name errors)
2. Highlights key moments and actions
3. Uses proper soccer terminology
4. Has an engaging, professional tone
<end_of_turn>

<start_of_turn>model
"""
        return prompt
    
    def summarize(self, transcript, max_length=250):
        """Generate a summary for the given transcript."""
        # Load model if not already loaded
        if self.model is None:
            self.load_model()
            
        # Create optimized prompt
        prompt = self.create_optimized_prompt(transcript)
        
        # Tokenize input
        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
        
        # Generate summary
        with torch.no_grad():
            outputs = self.model.generate(
                **inputs,
                max_new_tokens=max_length,
                temperature=0.7,
                top_p=0.9,
                do_sample=True
            )
            
        # Extract generated summary
        full_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        summary = full_response.split("<start_of_turn>model")[-1].strip()
        
        # Optionally free memory after generation
        # self.unload_model()
        
        return summary
    
    def summarize_from_file(self, srt_path, max_length=500):
        """Generate a summary from an SRT file."""
        transcript = self.load_transcript_from_srt(srt_path)
        return self.summarize(transcript, max_length)


# Example usage
if __name__ == "__main__":
    # Path to a transcript file
    srt_path = "../../processed_videos/Las_Palmas_vs._Barcelona___LALIGA_Highlights___ESPN_FC_720.srt"
    
    # Create summarizer service
    summarizer = SummarizerService(use_8bit=True)
    
    # Generate summary
    summary = summarizer.summarize_from_file(srt_path)
    print("\n=== GENERATED SUMMARY ===")
    print(summary)
    
    # Unload model when done
    summarizer.unload_model()

Loading model: google/gemma-3-1b-it
Model loaded successfully

=== GENERATED SUMMARY ===
user
You are a professional soccer commentator tasked with creating concise, engaging summaries of match highlights.

Context: The transcript contains speech recognition errors in player and team names - please correct these in your summary.

Please summarize the following match highlights in a concise, engaging paragraph:

HIGHLIGHTS TRANSCRIPT:
And this is the starting lineup for Diego Martinez and Blasparmas as you mentioned defense has been there a kill is here Will there be Marmo moving inside to play alongside Alex Sware is the captain at center back Jenny the last Palmer starting lineup Alex what do you make of Barcelona's starting lineup today most of the changes for Handsy flick in recent weeks have come in the center back pairing five different center back pairings in the last five games today It's Kubasi and Eric Garcia Only did the things to fix what was going wrong for last Palmer's ea

In [None]:
import os
import gc
import torch
import re
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

class GemmaSummarizerService:
    """Service to summarize soccer match transcripts using the base Gemma model."""
    
    def __init__(self, model_name="google/gemma-3-1b-it", use_8bit=True):
        self.model_name = model_name
        self.model = None
        self.tokenizer = None
        self.use_8bit = use_8bit
    
    def load_model(self):
        """Load the model with memory optimizations."""
        if self.model is not None:
            # Model already loaded
            return
            
        # Clear GPU memory
        torch.cuda.empty_cache()
        gc.collect()
        
        print(f"Loading model: {self.model_name}")
        
        # Load tokenizer
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
        
        if self.use_8bit:
            # 8-bit quantization for memory efficiency
            bnb_config = BitsAndBytesConfig(
                load_in_8bit=True,
                llm_int8_skip_modules=["embed_tokens", "lm_head"]
            )
            
            # Load in 8-bit mode
            self.model = AutoModelForCausalLM.from_pretrained(
                self.model_name,
                quantization_config=bnb_config,
                device_map="auto",
                torch_dtype=torch.float16,
                max_memory={0: "7GiB", "cpu": "12GiB"}
            )
        else:
            # Load in FP16 mode
            self.model = AutoModelForCausalLM.from_pretrained(
                self.model_name,
                torch_dtype=torch.float16,
                device_map="auto"
            )
            
        print("Model loaded successfully")
    
    def unload_model(self):
        """Free up GPU memory by deleting the model."""
        if self.model is not None:
            del self.model
            self.model = None
            del self.tokenizer
            self.tokenizer = None
            torch.cuda.empty_cache()
            gc.collect()
            print("Model unloaded from memory")
    
    def load_transcript_from_srt(self, srt_path):
        """Extract transcript text from an SRT file."""
        with open(srt_path, 'r', encoding='utf-8') as f:
            content = f.read()
        
        # Remove SRT formatting (numbers and timestamps)
        srt_pattern = r'\d+\n\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}\n(.*?)(?=\n\n\d+\n|$)'
        matches = re.findall(srt_pattern, content, re.DOTALL)
        transcript = ' '.join([m.replace('\n', ' ') for m in matches])
        
        return transcript
    
    def create_optimized_prompt(self, transcript):
        """Create an optimized prompt that guides the model to produce high-quality summaries."""
        prompt = f"""<start_of_turn>user
You are a professional soccer commentator tasked with creating concise, engaging summaries of match highlights.

Context: The transcript contains speech recognition errors in player and team names - please correct these in your summary.

Please summarize the following match highlights in a concise, engaging paragraph:

HIGHLIGHTS TRANSCRIPT:
{transcript}

Create a summary that:
1. Correctly identifies teams and players (fixing any name errors)
2. Highlights key moments and actions
3. Uses proper soccer terminology
4. Has an engaging, professional tone
<end_of_turn>

<start_of_turn>model
"""
        return prompt
    
    def summarize(self, transcript, max_length=500):
        """Generate a summary for the given transcript."""
        # Load model if not already loaded
        if self.model is None:
            self.load_model()
            
        # Create optimized prompt
        prompt = self.create_optimized_prompt(transcript)
        
        # Tokenize input
        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
        
        # Generate summary
        with torch.no_grad():
            outputs = self.model.generate(
                **inputs,
                max_new_tokens=max_length,
                temperature=0.7,
                top_p=0.9,
                do_sample=True
            )
            
        # Extract generated summary
        full_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        summary = full_response.split("<start_of_turn>model")[-1].strip()
        
        return summary
    
    def summarize_from_file(self, srt_path, max_length=500):
        """Generate a summary from an SRT file."""
        transcript = self.load_transcript_from_srt(srt_path)
        return self.summarize(transcript, max_length)