In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
from typing import Tuple, Dict, List
import warnings

# Suppress warnings to make output cleaner during execution
warnings.filterwarnings('ignore')

# Function to load and preprocess match data
def load_match_data(data_path: str) -> pd.DataFrame:
    """
    Load match data from a CSV file, convert date columns to datetime format,
    and filter for matches occurring after a specific date.

    Args:
        data_path: Path to the CSV file containing match data.
    Returns:
        A DataFrame with the loaded and filtered data.
    """
    # Read the dataset from the provided file path into a DataFrame
    df = pd.DataFrame(pd.read_csv(data_path))

    # List of columns containing datetime values for proper formatting
    time_cols = ['half_start_datetime', 'match_start_datetime', 'latest_bookmaker_update']
    for col in time_cols:
        # Convert these columns to datetime format for easier processing
        df[col] = pd.to_datetime(df[col])

    # Define the cutoff date: Only matches starting after this date will be included
    start_date = pd.to_datetime('2024-11-01')
    df = df[df['match_start_datetime'] >= start_date]

    return df

# Function to split the dataset into individual matches
def split_into_matches(df: pd.DataFrame) -> List[pd.DataFrame]:
    """
    Split the dataset into separate DataFrames for each match.

    Args:
        df: Full DataFrame containing data for all matches.
    Returns:
        A list of DataFrames, where each DataFrame contains data for a single match.
    """
    # Use groupby on the 'fixture_id' column to group data by match
    # Convert each group into a DataFrame and store in a list
    matches = [group for _, group in df.groupby('fixture_id')]
    return matches

# Class to define and execute a live betting strategy
class LiveBettingStrategy:
    def __init__(self):
        """
        Initialize the betting strategy with specific parameters.
        """
        # Minimum and maximum odds thresholds to filter out unrealistic bets
        self.min_odds_threshold = 1.5
        self.max_odds_threshold = 8.0

        # Minimum confidence score required to make a prediction
        self.confidence_threshold = 0.75

        # Decision-making times (in minutes) during the match
        self.decision_times = list(range(15, 41, 5))  # 15th to 40th minute, in 5-minute intervals

    def make_prediction(self, features: Dict) -> Tuple[str, float]:
        """
        Use match features to make a prediction about the outcome of the match.

        Args:
            features: Dictionary containing calculated statistics and odds.
        Returns:
            A tuple containing the predicted outcome ('home', 'away', 'draw', or 'no action')
            and the confidence score.
        """
        # Calculate scores for the 'home', 'away', and 'draw' outcomes based on weighted features
        home_score = (
            0.3 * features['home_implied_prob'] +  # Influence of implied probability on prediction
            0.3 * (features['home_attack_momentum'] /
                   (features['home_attack_momentum'] + features['away_attack_momentum'] + 1e-6)) +
            0.2 * (features['home_possession'] / 100) +  # Normalize possession percentage
            0.2 * features['home_shot_accuracy']  # Accuracy of home team's shots
        )

        away_score = (
            0.3 * features['away_implied_prob'] +
            0.3 * (features['away_attack_momentum'] /
                   (features['home_attack_momentum'] + features['away_attack_momentum'] + 1e-6)) +
            0.2 * (features['away_possession'] / 100) +
            0.2 * features['away_shot_accuracy']
        )

        draw_score = (
            0.6 * features['draw_implied_prob'] +  # High weight on draw's implied probability
            0.4 * (1 - abs(home_score - away_score))  # Penalize high differences between home/away scores
        )

        # Create a dictionary of scores for all outcomes
        scores = {'home': home_score, 'draw': draw_score, 'away': away_score}

        # Find the outcome with the highest score
        best_prediction = max(scores.items(), key=lambda x: x[1])
        prediction, confidence = best_prediction

        # Return "no action" if confidence is below the threshold
        if confidence < self.confidence_threshold:
            return "no action", confidence

        return prediction, confidence

    def preprocess_match_data(self, df: pd.DataFrame) -> pd.DataFrame:
        """
        Preprocess match data to calculate derived statistics for decision-making.

        Args:
            df: Raw DataFrame containing match data.
        Returns:
            A DataFrame with additional calculated features.
        """
        # Convert relevant columns to datetime format
        time_cols = ['half_start_datetime', 'match_start_datetime', 'latest_bookmaker_update']
        for col in time_cols:
            df[col] = pd.to_datetime(df[col])

        # Calculate the minutes played in the match (minute + fractional seconds)
        df['minutes_played'] = df['minute'] + (df['second'] / 60)

        # Compute the goal difference and total goals scored so far
        df['goal_difference'] = df['Goals - home'] - df['Goals - away']
        df['total_goals'] = df['Goals - home'] + df['Goals - away']

        # Compute features related to home and away teams
        for team in ['home', 'away']:
            # Calculate shot accuracy (shots on target / total shots)
            df[f'{team}_shot_accuracy'] = np.where(
                df[f'Shots Total - {team}'] > 0,
                df[f'Shots On Target - {team}'] / df[f'Shots Total - {team}'],
                0
            )

            # Compute an attack momentum metric as a weighted combination of offensive statistics
            df[f'{team}_attack_momentum'] = (
                df[f'Dangerous Attacks - {team}'] +
                3 * df[f'Shots On Target - {team}'] +
                2 * df[f'Shots Insidebox - {team}'] +
                df[f'Corners - {team}']
            )

            # Calculate possession efficiency (successful passes / total passes)
            df[f'{team}_possession_efficiency'] = np.where(
                df[f'Passes - {team}'] > 0,
                df[f'Successful Passes - {team}'] / df[f'Passes - {team}'],
                0
            )

        # Calculate the implied probabilities from betting odds
        df['home_implied_prob'] = np.where(df['1'] > 0, 1 / df['1'], 0)
        df['draw_implied_prob'] = np.where(df['X'] > 0, 1 / df['X'], 0)
        df['away_implied_prob'] = np.where(df['2'] > 0, 1 / df['2'], 0)

        return df


    def calculate_match_features(self, match_data: pd.DataFrame, current_minute: float) -> Dict:
        """
        Calculate relevant features for decision making at a specific minute

        Args:
            match_data: Processed match data
            current_minute: Current minute in the match
        Returns:
            Dictionary of calculated features
        """
        # Get data up to current minute
        current_data = match_data[match_data['minutes_played'] <= current_minute].iloc[-1]

        features = {
            'minute': current_minute,
            'score_difference': current_data['goal_difference'],
            'total_goals': current_data['total_goals'],
            'home_attack_momentum': current_data['home_attack_momentum'],
            'away_attack_momentum': current_data['away_attack_momentum'],
            'home_shot_accuracy': current_data['home_shot_accuracy'],
            'away_shot_accuracy': current_data['away_shot_accuracy'],
            'home_possession': current_data['Ball Possession % - home'],
            'away_possession': current_data['Ball Possession % - away'],
            'home_odds': current_data['1'],
            'draw_odds': current_data['X'],
            'away_odds': current_data['2'],
            'home_implied_prob': current_data['home_implied_prob'],
            'draw_implied_prob': current_data['draw_implied_prob'],
            'away_implied_prob': current_data['away_implied_prob']
        }

        return features

    def find_optimal_decision_point(self, match_data: pd.DataFrame) -> Tuple[str, float, float]:
        """
        Make a decision at the earliest available decision time without forward-seeking.
        """
        for minute in self.decision_times:
            if minute > match_data['minutes_played'].max():
                break

            features = self.calculate_match_features(match_data, minute)
            prediction, confidence = self.make_prediction(features)

            if confidence >= self.confidence_threshold:
                return prediction, confidence, minute

        return "no action", 0.0, 0.0

    def evaluate_prediction(self, prediction: str, final_result: str) -> bool:
        """
        Evaluate if the prediction was correct
        """
        if prediction == "no action":
            return False
        return prediction == final_result

    def calculate_returns(self, prediction: str, final_result: str, odds: Dict[str, float]) -> float:
        """
        Calculate betting returns
        """
        if prediction == "no action":
            return 0

        if prediction == final_result:
            return odds[prediction] - 1  # Subtract 1 to get net return

        return -1  # Lost stake

    def run_strategy(self, matches_data: List[pd.DataFrame]) -> pd.DataFrame:
        """
        Run the betting strategy on a list of matches
        """
        results = []

        for match_data in matches_data:
            processed_data = self.preprocess_match_data(match_data)
            prediction, confidence, decision_minute = self.find_optimal_decision_point(processed_data)

            final_data = processed_data.iloc[-1]
            if final_data['Goals - home'] > final_data['Goals - away']:
                final_result = 'home'
            elif final_data['Goals - home'] < final_data['Goals - away']:
                final_result = 'away'
            else:
                final_result = 'draw'

            odds = {
                'home': final_data['1'],
                'draw': final_data['X'],
                'away': final_data['2']
            }

            returns = self.calculate_returns(prediction, final_result, odds)
            correct = self.evaluate_prediction(prediction, final_result)

            results.append({
                'match_id': match_data['fixture_id'].iloc[0],
                'prediction': prediction,
                'confidence': confidence,
                'decision_minute': decision_minute,
                'final_result': final_result,
                'correct': correct,
                'returns': returns
            })

        return pd.DataFrame(results)


def main():
    data_path = '/content/drive/MyDrive/cusutvmo_dataset/match_data.csv'

    try:
        print("Loading dataset...")
        full_data = load_match_data(data_path)

        print("Splitting into individual matches...")
        matches_data = split_into_matches(full_data)
        print(f"Found {len(matches_data)} matches in the dataset")

        strategy = LiveBettingStrategy()

        print("Running betting strategy...")
        results = strategy.run_strategy(matches_data)

        total_predictions = len(results[results['prediction'] != 'no action'])
        correct_predictions = results['correct'].sum()
        accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
        cumulative_returns = results['returns'].sum()

        print("\nStrategy Performance:")
        print(f"Total Matches Analyzed: {len(matches_data)}")
        print(f"Total Predictions Made: {total_predictions}")
        print(f"Correct Predictions: {correct_predictions}")
        print(f"Accuracy: {accuracy:.2%}")
        print(f"Cumulative Returns: {cumulative_returns:.2f} units")

        results.to_csv('betting_strategy_results.csv', index=False)
        print("\nResults saved to betting_strategy_results.csv")

    except FileNotFoundError:
        print(f"Error: Could not find the data file at {data_path}")
    except Exception as e:
        print(f"Error occurred: {str(e)}")


if __name__ == "__main__":
    main()


Loading dataset...
Splitting into individual matches...
Found 111 matches in the dataset
Running betting strategy...

Strategy Performance:
Total Matches Analyzed: 111
Total Predictions Made: 26
Correct Predictions: 18
Accuracy: 69.23%
Cumulative Returns: -7.72 units

Results saved to betting_strategy_results.csv


**split_into_matches(df: pd.DataFrame) -> List[pd.DataFrame]**

- Splits the dataset into separate DataFrames for each match using the fixture_id.
- Returns a list of DataFrames, each corresponding to an individual match.

**LiveBettingStrategy Class**
**__init__()**

- Initializes strategy parameters:
- min_odds_threshold: Minimum odds for a bet to be considered.
- max_odds_threshold: Maximum odds for a bet to be considered.
- confidence_threshold: Minimum confidence score to place a bet.
- decision_times: Predefined times (15-40 minutes) for making decisions.

**make_prediction(features: Dict) -> Tuple[str, float]**

- Computes prediction scores for:
- Home win (home_score).
- Away win (away_score).
- Draw (draw_score).
- Combines factors like implied probabilities, attack momentum, possession, and shot accuracy.
- Selects the outcome with the highest score if it meets the confidence threshold; otherwise, recommends "no action."

**preprocess_match_data(df: pd.DataFrame) -> pd.DataFrame**

- Adds calculated features to the dataset:
- Time Features: Match progression in minutes.
- Goal Metrics: Goal difference and total goals.
- Team Metrics: Shot accuracy, attack momentum, possession efficiency, and implied probabilities based on betting odds.

**calculate_match_features(match_data: pd.DataFrame, current_minute: float) -> Dict**

- Extracts the latest data up to the specified minute.
- Calculates features like attack momentum, shot accuracy, possession, and implied probabilities for both teams.


**find_optimal_decision_point(match_data: pd.DataFrame) -> Tuple[str, float, float]**

- Iterates through predefined decision times (15-40 minutes).
- Predicts the match outcome at each time and selects the first instance with sufficient confidence.
- Returns the prediction, confidence, and decision time.


**evaluate_prediction(prediction: str, final_result: str) -> bool**

- Checks if the predicted outcome matches the actual result.

**calculate_returns(prediction: str, final_result: str, odds: Dict[str, float]) -> float**

- Calculates net returns for a correct prediction based on the betting odds.
- Returns -1 for incorrect predictions (lost stake) or 0 for no action.


**run_strategy(matches_data: List[pd.DataFrame]) -> pd.DataFrame**

- Applies the betting strategy to a list of matches.
- For each match: Processes the data.
- Determines the optimal decision point and prediction.
- Evaluates the prediction against the final result.
- Calculates returns and records the results.
- Returns a DataFrame summarizing the strategy's performance.


**main()**

- Loads match data from a specified file path.
- Splits the data into individual matches.
- Runs the betting strategy on all matches.
- Computes and prints performance metrics:
- Total predictions made, accuracy, and cumulative returns.
- Saves results to a CSV file.
