<a href="https://colab.research.google.com/github/Beyolandr7/ai-football-analyst/blob/main/AI_Football_Analyst.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install langchain_community
!pip install replicate

#Import LLM

In [4]:
from langchain_community.llms import Replicate
import os
from google.colab import userdata
# Set the API token
api_token = userdata.get('REPLICATE_API_TOKEN')
os.environ["REPLICATE_API_TOKEN"] = api_token
# Model setup
model = "ibm-granite/granite-3.2-8b-instruct"
output = Replicate(
model=model,
replicate_api_token=api_token,
)

#Parameters

In [None]:
parameters = {
"temperature": 0.2,
"top_p": 0.9,
"top_k": 0,
"repetition_penalty": 1.1,
"max_tokens": 512
}

#Analysis

In [15]:
import pandas as pd

def analyze_epl_data(file_path):
    """
    Analyzes EPL match data efficiently using pandas groupby operations.

    Args:
        file_path (str): The URL path to the EPL CSV file.

    Returns:
        pd.DataFrame: A DataFrame containing performance statistics for each team.
    """
    try:
        df = pd.read_csv(file_path)
    except Exception as e:
        print(f"Error loading data from URL: {e}")
        return None

    # --- Home Game Analysis ---
    home_stats = df.groupby('HomeTeam').agg(
        home_games=('HomeTeam', 'count'),
        home_wins=('FullTimeResult', lambda x: (x == 'H').sum()),
        home_draws=('FullTimeResult', lambda x: (x == 'D').sum()),
        home_losses=('FullTimeResult', lambda x: (x == 'A').sum())
    )
    home_stats['home_win_pct'] = (home_stats['home_wins'] / home_stats['home_games']) * 100
    home_stats['home_draw_pct'] = (home_stats['home_draws'] / home_stats['home_games']) * 100
    home_stats['home_loss_pct'] = (home_stats['home_losses'] / home_stats['home_games']) * 100

    # --- Away Game Analysis ---
    away_stats = df.groupby('AwayTeam').agg(
        away_games=('AwayTeam', 'count'),
        away_wins=('FullTimeResult', lambda x: (x == 'A').sum()),
        away_draws=('FullTimeResult', lambda x: (x == 'D').sum()),
        away_losses=('FullTimeResult', lambda x: (x == 'H').sum())
    )
    away_stats['away_win_pct'] = (away_stats['away_wins'] / away_stats['away_games']) * 100
    away_stats['away_draw_pct'] = (away_stats['away_draws'] / away_stats['away_games']) * 100
    away_stats['away_loss_pct'] = (away_stats['away_losses'] / away_stats['away_games']) * 100

    # --- Merge stats into a single DataFrame ---
    merged_stats = pd.merge(
        home_stats,
        away_stats,
        left_index=True,
        right_index=True,
        how='outer'
    ).fillna(0)

    return merged_stats.round(2)

def generate_formatted_prompt(stats_df):
    """
    Generates a detailed and clearly formatted prompt for the IBM Granite model.

    Args:
        stats_df (pd.DataFrame): DataFrame of team performance statistics.

    Returns:
        str: A formatted prompt string.
    """

    # Build the data part of the prompt in a readable format
    data_string = ""
    for team, data in stats_df.iterrows():
        data_string += f"Club: {team}\n"
        data_string += "- Home Performance:\n"
        data_string += f"  - Wins: {data['home_win_pct']}%\n"
        data_string += f"  - Draws: {data['home_draw_pct']}%\n"
        data_string += f"  - Losses: {data['home_loss_pct']}%\n"
        data_string += "- Away Performance:\n"
        data_string += f"  - Wins: {data['away_win_pct']}%\n"
        data_string += f"  - Draws: {data['away_draw_pct']}%\n"
        data_string += f"  - Losses: {data['away_loss_pct']}%\n"
        data_string += "---\n" # Separator for clarity

    prompt = f"""
Act as an expert football analyst. Your task is to analyze the provided English Premier League match statistics.
Based on the data below, classify each club's performance at home and away.

After analyzing all clubs, conclude by identifying:
- The top 3 clubs with the best home record (fortresses).
- The top 3 clubs with the best away record (best travelers).
- Add any other interesting findings that you find

Here is the clearly formatted data:

{data_string}

Provide the analysis in a clear, structured format.
"""
    return prompt

# --- Main Execution ---

# "raw" GitHub URL to access CSV data directly
github_url = 'https://raw.githubusercontent.com/Beyolandr7/ai-football-analyst/076a8380f54b7626f92b301c3662387bc03efce5/epl_match_stats.csv'

# 1. Analyze the data
all_team_stats_df = analyze_epl_data(github_url)

if all_team_stats_df is not None:
    # 2. Generate prompt
    granite_prompt = generate_formatted_prompt(all_team_stats_df)

    # 3. Invoke the model with the example prompt
    response = output.invoke(granite_prompt)

    # 4. Print the response
    print("Granite Model Response:\n")
    print(response)

Granite Model Response:

### Home Performance Analysis

**Top 3 Clubs with the Best Home Record (Fortresses):**

1. **Manchester City** - 65.11% wins
   - Man City has an exceptional home record, winning over 65% of their matches at home, making them one of the strongest home teams in the Premier League.

2. **Manchester United** - 67.8% wins
   - Man United boasts the highest win percentage among home teams, winning nearly 68% of their home games. This demonstrates their formidable presence at Old Trafford.

3. **Chelsea** - 63.75% wins
   - Chelsea's home performance is also remarkable, with a win rate just below Manchester City and United. Stamford Bridge has traditionally been a tough place for opponents.

### Away Performance Analysis

**Top 3 Clubs with the Best Away Record (Best Travelers):**

1. **Manchester City** - 46.0% wins
   - Despite their dominance at home, Man City also performs well away from home, securing victories in nearly 46% of their away games, showcasing their