In [None]:
import os
import matplotlib.pyplot as plt
from collections import defaultdict

# Step 1: Data Input & Preprocessing
def read_possession_data(half, file_directory):
    possession_data = []
    for filename in os.listdir(file_directory):
        if filename.startswith(f'half_{half}'):
            with open(os.path.join(file_directory, filename), 'r') as file:
                data = file.readlines()
                possession_data.append(data)
    return possession_data

# Step 2: Processing Possession Graphs
def process_possession_graphs(possession_data):
    home_team_passes = defaultdict(int)
    away_team_passes = defaultdict(int)
    
    # Define team player ranges
    home_team_range = range(1, 15)  # Player IDs 1-14 for home team
    away_team_range = range(15, 29) # Player IDs 15-28 for away team
    
    for possession in possession_data:
        for line in possession:
            try:
                sender_id, receiver_id = map(int, line.strip().split('->'))
                
                # Categorize the pass
                if sender_id in home_team_range and receiver_id in home_team_range:
                    home_team_passes[(sender_id, receiver_id)] += 1
                elif sender_id in away_team_range and receiver_id in away_team_range:
                    away_team_passes[(sender_id, receiver_id)] += 1
            except ValueError:
                continue  # Skip malformed data

    return home_team_passes, away_team_passes

# Step 3: Extracting Passing Patterns
def extract_passing_patterns(possession_data):
    home_team_patterns = defaultdict(int)
    away_team_patterns = defaultdict(int)
    
    home_team_range = range(1, 15)
    away_team_range = range(15, 29)
    
    for possession in possession_data:
        home_team_current_pattern = []
        away_team_current_pattern = []
        
        for line in possession:
            try:
                sender_id, receiver_id = map(int, line.strip().split('->'))
                
                # Track home team patterns
                if sender_id in home_team_range and receiver_id in home_team_range:
                    home_team_current_pattern.append((sender_id, receiver_id))
                    home_team_patterns[tuple(home_team_current_pattern)] += 1
                else:
                    home_team_current_pattern = []

                # Track away team patterns
                if sender_id in away_team_range and receiver_id in away_team_range:
                    away_team_current_pattern.append((sender_id, receiver_id))
                    away_team_patterns[tuple(away_team_current_pattern)] += 1
                else:
                    away_team_current_pattern = []

            except ValueError:
                continue  # Skip malformed data

    return home_team_patterns, away_team_patterns

# Step 4: Visualization & Analysis
def plot_pass_frequencies(home_team_passes, away_team_passes):
    # Create bar chart comparing home and away team pass frequencies
    home_team_total_passes = sum(home_team_passes.values())
    away_team_total_passes = sum(away_team_passes.values())
    
    teams = ['Home Team', 'Away Team']
    pass_counts = [home_team_total_passes, away_team_total_passes]
    
    plt.figure(figsize=(8, 6))
    plt.bar(teams, pass_counts, color=['blue', 'red'])
    plt.title('Total Passes Comparison')
    plt.xlabel('Team')
    plt.ylabel('Total Passes')
    plt.show()

def plot_top_passing_patterns(home_team_patterns, away_team_patterns):
    # Sort patterns by frequency and select top 10
    home_team_top_patterns = sorted(home_team_patterns.items(), key=lambda x: x[1], reverse=True)[:10]
    away_team_top_patterns = sorted(away_team_patterns.items(), key=lambda x: x[1], reverse=True)[:10]
    
    # Plot the top passing patterns for both teams
    home_team_patterns_count = [x[1] for x in home_team_top_patterns]
    away_team_patterns_count = [x[1] for x in away_team_top_patterns]
    
    labels_home = [str(x[0]) for x in home_team_top_patterns]
    labels_away = [str(x[0]) for x in away_team_top_patterns]
    
    plt.figure(figsize=(10, 6))
    plt.barh(labels_home, home_team_patterns_count, color='blue', label='Home Team')
    plt.barh(labels_away, away_team_patterns_count, color='red', label='Away Team')
    plt.title('Top 10 Passing Patterns')
    plt.xlabel('Frequency')
    plt.ylabel('Pattern')
    plt.legend()
    plt.show()

# Main Code: Putting It All Together
def main(file_directory):
    # Step 1: Read data from possession phase files for both halves
    half_1_data = read_possession_data(1, file_directory)
    half_2_data = read_possession_data(2, file_directory)
    
    # Combine possession data from both halves
    all_possession_data = half_1_data + half_2_data
    
    # Step 2: Process the possession graphs to track pass frequencies
    home_team_passes, away_team_passes = process_possession_graphs(all_possession_data)
    
    # Step 3: Extract passing patterns
    home_team_patterns, away_team_patterns = extract_passing_patterns(all_possession_data)
    
    # Step 4: Visualize the results
    plot_pass_frequencies(home_team_passes, away_team_passes)
    plot_top_passing_patterns(home_team_patterns, away_team_patterns)

# Example: Run the analysis for a given directory of match data files
if _name_ == "_main_":
    file_directory = "C:\discreteproject"  # Change this to the directory where your data is stored
    main(file_directory)