In [143]:
%%writefile streamlit_app.py
import streamlit as st
import pandas as pd
import numpy as np
import joblib
import os
import base64
import matplotlib.pyplot as plt
from mplsoccer import Pitch
import plotly.express as px
import plotly.graph_objs as go
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
from scipy import ndimage



# Function to load the machine learning models and scaler, using caching for faster subsequent access.
@st.cache_resource
def load_models_and_scaler():
    # Load pre-trained models and scaler used in the dashboard
    xgb_model = joblib.load('final_xgb_model.pkl')
    lgb_model = joblib.load('final_lgb_model.pkl')
    catboost_model = joblib.load('final_catboost_model.pkl')
    stack_classifier = joblib.load('stacking_clf.pkl')
    scaler = joblib.load('scaler.pkl')
    return xgb_model, lgb_model, catboost_model, stack_classifier, scaler

# Function to load the shots data, also cached for efficiency. Only loads once during a session.
@st.cache_resource
def load_data():
    return pd.read_excel('/Users/sp/Downloads/Dissertation/Datasets/shots_combined/shots_streamlit.xlsx')

# Caching function to retrieve historical features for a specific player from the shots data.
@st.cache_data
def get_historical_features(player_name, shots):
    # Extract and compute averages of key player features
    player_data = shots[shots['player_striker'] == player_name]
    historical_features = {
        'overall_striker_avg': player_data['overall_striker'].mean(),
        'power_long_shots_striker_avg': player_data['power_long_shots_striker'].mean(),
        'mentality_vision_striker_avg': player_data['mentality_vision_striker'].mean(),
        'attacking_finishing_striker_avg': player_data['attacking_finishing_striker'].mean()
    }
    return historical_features

# Function to calculate expected goals (xG) for a given shot, based on input features and a pre-trained model.
def calculate_xG(model, scaler, position_x, position_y, is_penalty, is_header, is_home, is_OpenPlay, is_DirectFreekick, is_FromCorner, is_SetPiece, is_leading, is_lagging, is_tied, player_name, Last_Action, overall_defence_team, defence_defence_team, goalkeeping_reflexes_gk):
    # Calculate the angle of the goal based on the Y position of the shot
    Y_coord_angle = 1 - abs((position_y/100) - 0.50) * 2
    Last_Action_value = Last_Action_conditions[Last_Action]

    # Retrieve historical performance features for the player
    historical_features = get_historical_features(player_name, shots)

    # Organize data into a DataFrame for the model
    new_data = pd.DataFrame({
        'X': [position_x/100],
        'Y': [position_y/100],
        'Y_angle': [Y_coord_angle],
        'is_Penalty': [is_penalty],
        'is_head': [is_header],
        'is_home': [is_home],
        'is_OpenPlay': [is_OpenPlay],
        'is_DirectFreekick': [is_DirectFreekick],
        'is_FromCorner': [is_FromCorner],
        'is_SetPiece': [is_SetPiece],
        'is_leading': [is_leading],
        'is_lagging': [is_lagging],
        'is_tied': [is_tied],
        'overall_striker': [historical_features['overall_striker_avg']],
        'Last_Action': [Last_Action_value],
        'power_long_shots_striker': [historical_features['power_long_shots_striker_avg']],
        'goalkeeping_reflexes_gk': [goalkeeping_reflexes_gk],
        'mentality_vision_striker': [historical_features['mentality_vision_striker_avg']],
        'attacking_finishing_striker': [historical_features['attacking_finishing_striker_avg']],
        'defence_defence_team': [defence_defence_team],
        'overall_defence_team': [overall_defence_team]
    })

    # Scale the input features using the pre-loaded scaler and predict the xG using the selected model
    features = new_data[['X', 'Y', 'Y_angle', 'is_Penalty', 'is_head', 'is_home', 'is_OpenPlay', 'is_DirectFreekick', 'is_FromCorner', 'is_SetPiece', 'is_leading', 'is_lagging', 'is_tied', 'overall_striker', 'Last_Action', 'attacking_finishing_striker', 'mentality_vision_striker', 'power_long_shots_striker', 'overall_defence_team', 'defence_defence_team', 'goalkeeping_reflexes_gk']]
    
    scaled_features = scaler.transform(features)
    xg_prob = model.predict_proba(scaled_features)[:, 1]	# Extract xG probability

    # Add the calculated xG value back into the DataFrame for potential further use
    new_data['New xG'] = xg_prob
    
    return xg_prob[0], new_data

# Function to plot a football and goalkeeper image on the pitch for visual analysis of shot position
def plot_pitch_with_marker(x, y):

    # Draw the football pitch using 'mplsoccer'

    pitch = Pitch(pitch_color='grass', stripe=True, pitch_type='wyscout', line_color='white', goal_type='box', label=True, axis=True, tick=True)
    fig, ax = pitch.draw(figsize=(10, 5))
    
    # Load and position football and goalkeeper images onto the pitch for enhanced visual representation
    football_img = plt.imread("/Users/sp/Downloads/Dissertation/football.png")
    goalkeeper_img = plt.imread("/Users/sp/Downloads/Dissertation/goalkeeper.png")  

    rotated_goalkeeper_img = ndimage.rotate(goalkeeper_img, -90)

    # Create an offset image to place it on the pitch
    goalkeeper_box = OffsetImage(rotated_goalkeeper_img, zoom=0.03)  #
    goalkeeper_ab = AnnotationBbox(goalkeeper_box, (95, 50), frameon=False)   # Goalkeeper on the right side
    
    # Create an offset image to place it on the pitch
    imagebox = OffsetImage(football_img, zoom=0.01)  # Adjust zoom to scale the image
    football_ab = AnnotationBbox(imagebox, (x , y ), frameon=False) # Football at shot position

    # Add the football and goalkeeper markers to the pitch
    ax.add_artist(football_ab)
    ax.add_artist(goalkeeper_ab)

    # Set axis limits, labels, and title
    ax.set_xlim(0, 100)
    ax.set_ylim(0, 100)
    ax.set_xlabel('X Coordinate')
    ax.set_ylabel('Y Coordinate')
    ax.set_title('Football Pitch with Selected Shot Location')

    # Display the pitch in Streamlit
    st.pyplot(fig)

# Function to set a background image in the Streamlit app using CSS
def set_background_image(image_file):
    # Read and encode the image as base64 for embedding in HTML/CSS
    with open(image_file, "rb") as f:
        img_base64 = base64.b64encode(f.read()).decode()

    # Apply the image as a background using inline CSS in Streamlit
    st.markdown(
        f"""
        <style>
        .stApp {{
            background-image: url("data:image/jpeg;base64,{img_base64}");
            background-size: cover;
            background-repeat: no-repeat;
            background-attachment: fixed;
            background-position: center -100px;  /* Adjust this value to move the image up */
        }}
        </style>
        """,
        unsafe_allow_html=True
    )


# Set the background image for the Streamlit app
image_path = '/Users/sp/Downloads/Dissertation/734478e9fecce00363b0737d8e607d34.jpg'  # Make sure the path is correct
set_background_image(image_path)

# Main code execution
# Load shot data and machine learning models used for xG calculation
shots = load_data()
xgb_model, lgb_model, catboost_model, stack_classifier, scaler = load_models_and_scaler()

# Predefined dictionary to map last action categories to numeric values for model input
Last_Action_conditions = {
    'BlockedPass': 1, 'Interception': 1, 'Dispossessed': 1, 'LayOff': 1,
    'BallTouch': 1, 'CornerAwarded': 1, 'End': 1, 'Foul': 1, 'Goal': 1,
    'SubstitutionOff': 1, 'SubstitutionOn': 1, 'Tackle': 1, 'None': 1,
    'BallRecovery': 2, 'Aerial': 3, 'Chipped': 4, 'Throughball': 5,
    'TakeOn': 6, 'Rebound': 7, 'Standard': 8, 'Cross': 9,
    'Pass': 10, 'HeadPass': 10
}

# Filtered last action options for the dropdown menu, excluding 'Standard'
filtered_last_actions = [
    'BlockedPass', 'Foul', 'Corner', 'Tackle', 'BallRecovery', 'Aerial',
    'Chipped', 'TakeOn', 'Rebound','Standard', 'Cross', 'Pass', 'HeadPass'
]

# Remove 'Standard' from the options
filtered_last_actions_without_standard = [action for action in filtered_last_actions if action != 'Standard']

# Title of the Streamlit app
st.title('Expected Goals (xG) Calculator') 

# Option for user to select a player from any team or a specific team
player_team = st.selectbox('Select Player Team Type', ['Any Team', 'Specific Team'])
if player_team == 'Any Team':
    # Select player from all teams
    player_name = st.selectbox('Select Player', sorted(shots['player_striker'].unique()))
else:
    # If 'Specific Team' is selected, choose the team first, then the player
    col1, col2 = st.columns(2)
    with col1:
        team_of_player = st.selectbox('Select Player Team', sorted(shots['playerTeam'].unique()))
    with col2:
        player_name = st.selectbox('Select Player', sorted(shots[shots['playerTeam'] == team_of_player]['player_striker'].unique()))

# Checkbox for whether the match is a home game
is_home = st.checkbox('Home Game')
 
# Options for penalty scenarios
is_penalty = st.selectbox('Is it a Penalty?', ['No', 'Yes']) == 'Yes'

# Specific handling for penalty shots (fixed coordinates and disabled options)
if is_penalty:
    col1, col2 = st.columns(2)
    with col1:
        position_x = 88		# Fixed penalty spot coordinates
        position_x = st.slider('X Coordinate', 0, 100, 88, disabled=True)
        
    with col2:
        position_x = 50
        position_y = st.slider('Y Coordinate', 0, 100, 50, disabled=True)

    # Visualize shot on the football pitch
    plot_pitch_with_marker(position_x, position_y) 

    # Disable options irrelevant for penalties
    Last_Action = 'Standard'
    is_header = st.checkbox('Header', value=False, disabled=True)
    is_OpenPlay = st.checkbox('Open Play', value=False, disabled=True)
    is_DirectFreekick = st.checkbox('Direct Free Kick', value=False, disabled=True)
    is_FromCorner = st.checkbox('From Corner', value=False, disabled=True)
    is_SetPiece = st.checkbox('Set Piece', value=False, disabled=True)
    Last_Action = st.selectbox('Last Action', filtered_last_actions, index=filtered_last_actions.index('Standard'), disabled=True)
    is_header = False
    is_OpenPlay = False
    is_DirectFreekick = False
    is_FromCorner = False
    is_SetPiece = False
    position_x = 88
    position_y = 50
else:
    # Options for non-penalty shots, allow flexible coordinate selection and additional filters
    col1, col2 = st.columns(2)
    with col1:
        position_x = st.slider('X Coordinate', 0, 100, 50)
    with col2:
        position_y = st.slider('Y Coordinate', 0, 100, 50)
    plot_pitch_with_marker(position_x, position_y) 
    
    is_header = st.checkbox('Header')
    is_OpenPlay = st.checkbox('Open Play')

    # Adjust available options based on whether the shot is during Open Play or a set-piece scenario
    if is_OpenPlay:
        is_DirectFreekick = st.checkbox('Direct Free Kick', value=False, disabled=True)
        is_FromCorner = st.checkbox('From Corner', value=False, disabled=True)
        is_SetPiece = st.checkbox('Set Piece', value=False, disabled=True)
        Last_Action = st.selectbox('Last Action', filtered_last_actions_without_standard)
        is_DirectFreekick = False
        is_FromCorner = False
        is_SetPiece = False
    else:
        is_DirectFreekick = st.checkbox('Direct Free Kick')
        if is_DirectFreekick:
            is_FromCorner = st.checkbox('From Corner', value=False, disabled=True)
            is_SetPiece = st.checkbox('Set Piece', value=False, disabled=True)
            Last_Action = st.selectbox('Last Action', filtered_last_actions, index=filtered_last_actions.index('Standard'), disabled=True)
            is_FromCorner = False
            is_SetPiece = False
        else:
            is_FromCorner = st.checkbox('From Corner')
            if is_FromCorner:
                is_SetPiece = st.checkbox('Set Piece', value=False, disabled=True)
                Last_Action = st.selectbox('Last Action', filtered_last_actions_without_standard)
                
                is_SetPiece = False
            else:
                is_SetPiece = st.checkbox('Set Piece')
                
                Last_Action = st.selectbox('Last Action', filtered_last_actions_without_standard)

# Option to choose the game state: leading, lagging, or tied
game_state = st.selectbox('Game State', ['Leading', 'Lagging', 'Tied'])
is_leading = int(game_state == 'Leading')
is_lagging = int(game_state == 'Lagging')
is_tied = int(game_state == 'Tied')

# Opponent and goalkeeper selection based on custom input or specific team attributes
st.markdown("### Opponent Selection Method")
opponentSelection = st.selectbox('Select the Method for Choosing Opponent and Goalkeeper', ['Custom', 'Teams'])

# If the user selects "Teams"
if opponentSelection == 'Teams':
    # Select opponent team and goalkeeper from the team
    st.markdown("#### Select Team and Goalkeeper")
    
    # Select opponent team
    Opponent_name = st.selectbox('Select Opponent Team', sorted(shots['opponentTeam'].unique()))
    
    # Based on the selected opponent, provide a dropdown to select a goalkeeper from that team or any other team
    preferred_team = st.radio("Select Preferred Goalkeeper's Team", [Opponent_name, 'Other Team'])
    
    if preferred_team == 'Opponent Team':
        available_gks = sorted(shots[shots['opponentTeam'] == Opponent_name]['long_name_gk'].unique())
    else:
        available_gks = sorted(shots['long_name_gk'].unique())
    
    gk = st.selectbox('Select Goalkeeper', available_gks)
    
    # Retrieve defensive attributes for the opponent team
    overall_defence_team = shots[shots['opponentTeam'] == Opponent_name]['overall_defence_team'].mean()
    defence_defence_team = shots[shots['opponentTeam'] == Opponent_name]['defence_defence_team'].mean()
    goalkeeping_reflexes_gk = shots[shots['long_name_gk'] == gk]['goalkeeping_reflexes_gk'].mean()

else:
    st.markdown("#### Custom Opponent and Goalkeeper Attributes")
    
    # Custom attributes for opponent defense and goalkeeper skills
    overall_defence_team = st.slider('Opponent Overall Power', 0, 100, 80)
    defence_defence_team = st.slider('Opponent Defence Strength', 0, 100, 80)
    goalkeeping_reflexes_gk = st.slider('Goalkeeper Skill Rating', 0, 100, 80)

# Button to trigger xG calculation based on inputs
if st.button('Calculate xG'):

    # Calculate the expected goals (xG) based on selected features
    xg_value, new_data = calculate_xG(stack_classifier, scaler, position_x, position_y, is_penalty, is_header, is_home, is_OpenPlay, is_DirectFreekick, is_FromCorner, is_SetPiece, is_leading, is_lagging, is_tied, player_name, Last_Action, overall_defence_team, defence_defence_team, goalkeeping_reflexes_gk)
    xg_percentage = xg_value * 100

    # Display the calculated xG as a percentage
    st.markdown(f"### Expected Goals (xG) Percentage: **{xg_percentage:.2f}%**")

    # Display Feature Values in a Table Format
    # This section is responsible for displaying key feature values of the xG calculation in a clear, structured manner
    st.markdown("### Feature Values")

    # Split the columns of the DataFrame into two parts for better readability in the Streamlit app
    split_point = len(new_data.columns) // 2
    first_half = new_data.iloc[:, :split_point]
    second_half = new_data.iloc[:, split_point:]

    # Display relevant player and match information in two columns
    col1, col2 = st.columns(2)

    # Display Player and Opponent Information
    with col1:
        # Retrieve historical features for the selected player
        player_power = get_historical_features(player_name, shots)

        # Display player-related information including name and key stats (skills)
        st.markdown('#### Player Information')
        st.write(f"**Player Name:** {player_name}")
        st.write(f"**Overall Skill Rating:**  {player_power['overall_striker_avg']:.2f}")
        st.write(f"**Finishing Skill Rating:**  {player_power['attacking_finishing_striker_avg']:.2f}")
        st.write(f"**Shot Accuracy (Vision):** {player_power['mentality_vision_striker_avg']:.2f}")
        st.write(f"**Shot Power (Long Shots):** {player_power['power_long_shots_striker_avg']:.2f}")

        # Conditionally display information about the opponent based on the selection method (Teams or Custom)
        if opponentSelection == 'Teams':
            
            # Show details about the selected opponent team, defense strength, and goalkeeper
            st.markdown("#### Opponent Information")
            st.write(f"**Team**: {Opponent_name}")
            st.write(f"**Overall Defence**: {round(overall_defence_team, 2)}")
            st.write(f"**Defence Strength**: {round(defence_defence_team, 2)}")
            st.write(f"**Goalkeeper**: {gk}")
            st.write(f"**Goalkeeper Skill Rating**: {round(goalkeeping_reflexes_gk, 2)}")
        elif opponentSelection == 'Custom':

            # Display custom inputted attributes for opponent defense and goalkeeper
            st.markdown("#### Opponent Information")
            st.write(f"**Overall Defence**: {overall_defence_team}")
            st.write(f"**Defence Strength**: {defence_defence_team}")
            st.write(f"**Goalkeeper Skill Rating**: {goalkeeping_reflexes_gk}")

    # Display Match-Specific Information in the Second Column
    with col2:
        st.markdown("#### Match Information")

        # Show coordinates, match context, and shot characteristics selected by the user
        st.write(f"**X Coordinate**: {position_x}")
        st.write(f"**Y Coordinate**: {position_y}")
        st.write(f"**Is Home**: {is_home}")
        st.write(f"**Game State**: {game_state}")
        st.write(f"**Last Action**: {Last_Action}")
        st.write(f"**Penalty**: {is_penalty}")
        st.write(f"**Header**: {is_header}")
        st.write(f"**Open Play**: {is_OpenPlay}")
        st.write(f"**Direct Free Kick**: {is_DirectFreekick}")
        st.write(f"**From Corner**: {is_FromCorner}")
        st.write(f"**Set Piece**: {is_SetPiece}")


Overwriting streamlit_app.py
