In [1]:
import streamlit as st
import pandas as pd
import numpy as np
import joblib  # Zum Laden gespeicherter Modelle
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
import os

In [4]:
os.chdir('C:/Users/Katharina/Desktop/Weiterbildung/Bootcamp/Bootcamp/Final_project/streamlit_parts')
os.getcwd()

'C:\\Users\\Katharina\\Desktop\\Weiterbildung\\Bootcamp\\Bootcamp\\Final_project\\streamlit_parts'

In [5]:
%%writefile Home.py

import streamlit as st
st.set_page_config(
    page_title="My Main Page Title",  # Hier dein benutzerdefinierter Seitentitel
    page_icon=":rocket:",             # Optional, z. B. ein Emoji oder Bild-Link
    layout="centered"                 # Oder "wide", je nach gewünschtem Layout
)


import pandas as pd
import numpy as np
import joblib  # Zum Laden gespeicherter Modelle
import matplotlib.pyplot as plt
import seaborn as sns
import os
import requests
from PIL import Image
from io import BytesIO


#Pfad festlegen
os.chdir('C:/Users/Katharina/Desktop/Weiterbildung/Bootcamp/Bootcamp/Final_project/streamlit_parts')



#--------------------------------------------------------------------------------------------------------------------------
# Title
st.title("🏐 Welcome to the Beach Volleyball Machine Learning Experience!")
st.markdown("### Predict, Analyze, and get Informations about the Beachvolleyball World Tour 🏐")


# Introductory text
st.markdown("""
Welcome to this interactive machine learning app all about **Beach Volleyball**!  
Explore how data and predictive models can reveal new insights into player performance, team strategy, and match outcomes.

### What can you do here?
Each section of the app focuses on a different aspect of the Beach Volleyball World Tour — and includes **two machine learning models** per page.

- 🔍 **Data Overview** – Learn how the data was collected and prepared  
- 🔮 **Match Prediction** – Use two models to predict match outcomes  
- 🔮 **Error Forecasting** – Predict the total number of spike errors using different approaches  
- 📊 Compare players based on their average game statistics.
- 📊 Compare different Beachvolleyball teams based on their average game statistics.

Whether you're a fan, coach, or data scientist — dive in and explore how data and machine learning meets beach volleyball!
""")

# Funktion zum Laden von Bildern aus dem Web
def load_image(url):
    resp = requests.get(url)
    return Image.open(BytesIO(resp.content))

# Beispielbilder von Unsplash
img1_url = "https://images.unsplash.com/photo-1723138568659-d35c7680779f?q=80&w=1974&auto=format&fit=crop&ixlib=rb-4.1.0&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D"

# Zeige das einzelne Bild an
st.image(load_image(img1_url),
         #caption="Beachvolleyball 🏐",
         use_container_width=True)
# Show example images side by side
col1, col2 = st.columns(2)

# Footer
st.markdown("---")
st.caption("Created with ❤️ using Streamlit and Machine Learning.")


Overwriting Home.py


In [6]:
%%writefile pages/1_Data_collection.py


import streamlit as st
st.set_page_config(
    page_title="Data collection",       # Optional: Icon der Seite
    layout="centered"
)
import pandas as pd
import numpy as np
import joblib  # Zum Laden gespeicherter Modelle
import matplotlib.pyplot as plt
import seaborn as sns
import os




st.title("💾 Data collection")


st.markdown("""
#### **Goal**: Collect data on beach volleyball matches and players to analyze whether weather conditions significantly impact game outcomes.

### Procedure:
1. Data sources:
    - Collect beach volleyball data from the FIVB-VIS system, including tournaments, matches, players, and match statistics.
    - Merge and clean four tables to create a structured dataset.
2. Adding the Coordinates:
    - Extract tournament locations.
    - Use OpenCage (OpenStreetMap) to retrieve geographic coordinates.  
3. Adjusting Match Times:
    - Convert all time data to UTC.
    - Adjust time zones based on location coordinates.
4. Extracting Weather Data:
    - Retrieve historical weather data for each match based on location and time.
    - Use the Open-Meteo API to obtain relevant weather information.
5. Compiling the Dataset:
    - Refine multiple variables, including team names and player details.
    - Prepare a comprehensive dataset for analysis.
""")

Overwriting pages/1_Data_collection.py


In [7]:
%%writefile pages/2_Prediction_of_Match_Win.py

import streamlit as st
# Setze den gewünschten Seitentitel, das Icon und Layout (optional)
st.set_page_config(
    page_title="2: Machine Learning - Predict Match Win",  # Hier stellst du den angezeigten Namen ein
    page_icon="🔮"
)

import pandas as pd
import numpy as np
import joblib  # Zum Laden gespeicherter Modelle
import matplotlib.pyplot as plt
import seaborn as sns
import os
import plotly.graph_objects as go
from plotly.subplots import make_subplots


#Pfad festlegen

os.chdir('C:/Users/Katharina/Desktop/Weiterbildung/Bootcamp/Bootcamp/Final_project/streamlit_parts')


#Daten für MatchWin
df_Classif_mitW = pd.read_csv("ML_MatchWin_Weather2.csv", sep=';')
df_Classif_ohneW = pd.read_csv("ML_MatchWin_OHNEWeather_V2.csv", sep=';')


# Streamlit-Titel
# Streamlit Title and Subtitle
st.title("🏐 Machine Learning Models for Estimating a Match Win")
st.subheader("Predicting the Outcome of a Beach Volleyball Match")

# Add spacing
st.markdown("<br>", unsafe_allow_html=True)

# Descriptive introduction
st.markdown("""
## 📊 Model Options

Choose between different machine learning models to estimate the total number of *spike faults* in a game:

- 🌀 **With Weather Impact**  
  &nbsp;&nbsp;&nbsp;&nbsp;→ Predicts the *match outcome* of a game, considering weather conditions  
  &nbsp;&nbsp;&nbsp;&nbsp;*(Models: Random Forest & Gradient Boosting)*

- ☀️ **Without Weather Impact**  
  &nbsp;&nbsp;&nbsp;&nbsp;→ Predicts the *match outcome* of a game, without considering weather conditions  
  &nbsp;&nbsp;&nbsp;&nbsp;*(Models: Random Forest & Gradient Boosting)*

---

## 🛠️ Adjust Input Values

Use the filters in the **sidebar** to customize input values and generate more accurate predictions.
""")


#Modelle implementieren

model_choice = st.sidebar.selectbox('Choose the model', ['Match-Win Prediction with weather impact (Random Forest)', 'Match-Win Prediction with weather impact (Gradient Boosting)', 
                                                        'Match-Win Prediction without weather impact (Random Forest)', 'Match-Win Prediction without weather impact (Gradient Boosting)'])

#Lade das Model und die relevanten Daten
if model_choice == 'Match-Win Prediction with weather impact (Random Forest)':
    model = joblib.load('random_forest_model.pkl')
    df = df_Classif_mitW
    target_variable = 'match_win'

elif model_choice == 'Match-Win Prediction with weather impact (Gradient Boosting)':
    model = joblib.load('GradientBoosting_model.pkl')
    df = df_Classif_mitW
    target_variable = 'match_win'

elif model_choice == 'Match-Win Prediction without weather impact (Random Forest)':
    model = joblib.load('random_forest_model_ohneWetter.pkl')
    df = df_Classif_ohneW
    target_variable = 'match_win'

else:
    model = joblib.load('GradientBoosting_model_ohneWetter.pkl')
    df = df_Classif_ohneW
    target_variable = 'match_win'

# Add spacing before showing model choice
st.markdown("<br>", unsafe_allow_html=True)

# Display chosen model with styled text
st.markdown(f"""

---

### ✅ Selected Model  
You have chosen: <span style='font-weight:bold; color:#4CAF50;'>{model_choice}</span>

---

""", unsafe_allow_html=True)

#---------------------------------------------------------------------------------------------------------------------------------------
###########################################################
#Filter Geschlecht
###########################################################
#Gender neu definieren
df['Gender_x'] = df['Gender_x'].astype(int)

#Mapping
gender_mapping = {
    0: 'Male',
    1: 'Female'
}
original_gender_values = df['Gender_x'].unique()
display_options = [gender_mapping.get(g, g) for g in original_gender_values]

selected_gender_display = st.sidebar.selectbox("Choose Gender", display_options)

# Um das inverse Mapping zu erstellen, damit wir den Originalwert erhalten:
inverse_mapping = {v: k for k, v in gender_mapping.items()}
selected_gender = inverse_mapping.get(selected_gender_display, selected_gender_display)

# ------------------
# Filter: Tunier-Typ
# ------------------
# Angenommen, der Turnier-Typ wird in der Spalte "Type" gespeichert:
#array([ 4, 33,  5, 42, 51, 52]) -> als integer definiert
# type_mapping = {
#         4: 'World Championship',
#         5:'Olympic Games',
#         33: 'World Tour Finals',
#         51:'BeachProTour-Challenger',
#         52:'BeachProTour-Elite',
#         42: 'else'
# }
# original_type_values= df['Type'].unique()
# display_options2 = [type_mapping.get(g, g) for g in original_type_values]

# #selected_tournament = st.sidebar.selectbox("Choose Tournament Type", display_options2)

# inverse_mapping2 = {v: k for k, v in type_mapping.items()}
# selected_type = inverse_mapping2.get(selected_tournament, selected_tournament)

# ------------------
# Anwenden der Filter
# ------------------
# Kombiniere beide Filter: Geschlecht und Turnier-Typ
df_filtered = df[(df["Gender_x"] == selected_gender)]# & (df["Type"] == selected_type)]


# Eingabeformular für Benutzer
st.sidebar.header('Choose Input Values')

input_data = {}
if model_choice == 'Match-Win Prediction with weather impact (Random Forest)':
    input_data = {
    #             "SpikeFault": st.sidebar.slider("SpikeFault", int(df["SpikeFault"].min()), int(df["SpikeFault"].max())),
    # "SpikePoint": st.sidebar.slider("SpikePoint", int(df["SpikePoint"].min()), int(df["SpikePoint"].max())),
    # "ServeFault": st.sidebar.slider("ServeFault", int(df["ServeFault"].min()), int(df["ServeFault"].max())),
    # "ServePoint": st.sidebar.slider("ServePoint", int(df["ServePoint"].min()), int(df["ServePoint"].max())),
    # "ServeTotal": st.sidebar.slider("ServeTotal", int(df["ServeTotal"].min()), int(df["ServeTotal"].max())),
    # "BlockPoint": st.sidebar.slider("BlockPoint", int(df["BlockPoint"].min()), int(df["BlockPoint"].max())),
    # "BlockTotal": st.sidebar.slider("BlockTotal", int(df["BlockTotal"].min()), int(df["BlockTotal"].max())),
   # "DigTotal": st.sidebar.slider("DigTotal", int(df["DigTotal"].min()), int(df["DigTotal"].max())),
    #"ReceptionFault": st.sidebar.slider("ReceptionFault", int(df["ReceptionFault"].min()), int(df["ReceptionFault"].max())),
    #"SpikeTotal": st.sidebar.slider("SpikeTotal", int(df["SpikeTotal"].min()), int(df["SpikeTotal"].max())),
    "temperature": st.sidebar.slider("Temperature (°C)", float(df_filtered["temperature_2m"].min()), float(df_filtered["temperature_2m"].max())),
    "wind_speed": st.sidebar.slider("Wind Speed (km/h)", float(df_filtered["wind_speed_10m"].min()), float(df_filtered["wind_speed_10m"].max())),
    "rain": st.sidebar.slider("Rain (mm)", float(df_filtered["rain"].min()), float(df_filtered["rain"].max())),
    "wind_gusts": st.sidebar.slider("Wind Gusts (km/h)", float(df_filtered["wind_gusts_10m"].min()), float(df_filtered["wind_gusts_10m"].max())),
    "Team1": st.sidebar.selectbox("Team 1", df_filtered["Team1"].unique()),  # Auswahlbox für Teams
    "Team2": st.sidebar.selectbox("Team 2", df_filtered["Team2"].unique())   # Auswahlbox für Teams
        
    }
elif model_choice =='Match-Win Prediction with weather impact (Gradient Boosting)':
    input_data = {
        "temperature": st.sidebar.slider("Temperature (°C)", float(df_filtered["temperature_2m"].min()), float(df_filtered["temperature_2m"].max())),
        "wind_speed": st.sidebar.slider("Wind Speed (km/h)", float(df_filtered["wind_speed_10m"].min()), float(df_filtered["wind_speed_10m"].max())),
        "rain": st.sidebar.slider("Rain (mm)", float(df_filtered["rain"].min()), float(df_filtered["rain"].max())),
        "wind_gusts": st.sidebar.slider("Wind Gusts (km/h)", float(df_filtered["wind_gusts_10m"].min()), float(df_filtered["wind_gusts_10m"].max())),
        "Team1": st.sidebar.selectbox("Team 1", df_filtered["Team1"].unique()),  # Auswahlbox für Teams
        "Team2": st.sidebar.selectbox("Team 2", df_filtered["Team2"].unique())   # Auswahlbox für Teams


    }
elif model_choice =='Match-Win Prediction without weather impact (Random Forest)':
    input_data = {
            "SpikePoint": st.sidebar.slider("SpikePoint", int(df_filtered["SpikePoint"].min()), int(df_filtered["SpikePoint"].max())),
            "ServeFault": st.sidebar.slider("ServeFault", int(df_filtered["ServeFault"].min()), int(df_filtered["ServeFault"].max())),
            "ServePoint": st.sidebar.slider("ServePoint", int(df_filtered["ServePoint"].min()), int(df_filtered["ServePoint"].max())),
            "ServeTotal": st.sidebar.slider("ServeTotal", int(df_filtered["ServeTotal"].min()), int(df_filtered["ServeTotal"].max())),
            #"BlockPoint": st.sidebar.slider("BlockPoint", int(df_filtered["BlockPoint"].min()), int(df_filtered["BlockPoint"].max())),
            "BlockTotal": st.sidebar.slider("BlockTotal", int(df_filtered["BlockTotal"].min()), int(df_filtered["BlockTotal"].max())),
            "Team1": st.sidebar.selectbox("Team 1", df_filtered["Team1"].unique()),  # Auswahlbox für Teams
            "Team2": st.sidebar.selectbox("Team 2", df_filtered["Team2"].unique())   # Auswahlbox für Teams
    }
else:
    input_data = {
            "SpikePoint": st.sidebar.slider("SpikePoint", int(df_filtered["SpikePoint"].min()), int(df_filtered["SpikePoint"].max())),
            "ServeFault": st.sidebar.slider("ServeFault", int(df_filtered["ServeFault"].min()), int(df_filtered["ServeFault"].max())),
            "ServePoint": st.sidebar.slider("ServePoint", int(df_filtered["ServePoint"].min()), int(df_filtered["ServePoint"].max())),
            "ServeTotal": st.sidebar.slider("ServeTotal", int(df_filtered["ServeTotal"].min()), int(df_filtered["ServeTotal"].max())),
            #"BlockPoint": st.sidebar.slider("BlockPoint", int(df_filtered["BlockPoint"].min()), int(df_filtered["BlockPoint"].max())),
            "BlockTotal": st.sidebar.slider("BlockTotal", int(df_filtered["BlockTotal"].min()), int(df_filtered["BlockTotal"].max())),
            "Team1": st.sidebar.selectbox("Team 1", df_filtered["Team1"].unique()),  # Auswahlbox für Teams
            "Team2": st.sidebar.selectbox("Team 2", df_filtered["Team2"].unique())   # Auswahlbox für Teams
    }



# Konvertiere Eingabe zu DataFrame
input_df = pd.DataFrame([input_data], columns=model.feature_names_in_)


# Vorhersage und Wahrscheinlichkeitsberechnung erst starten, wenn der Button geklickt wurde
if st.sidebar.button("🔮 Show Prediction"):
    
    # Berechne die Vorhersage (z.B. 1: Gewinn, 0: Verlust)
    prediction = model.predict(input_df)
    # Wenn prediction[0] == 1, gewinnt Team 1, andernfalls gewinnt Team 2.
    if target_variable == 'match_win':
        winning_team = input_data["Team1"] if prediction[0] == 1 else input_data["Team2"]
        result_text = f'🏆 {winning_team} win this match!'

        st.markdown(
            f'<p style="font-size:24px; font-weight:bold;">Prediction: {result_text}</p>',
            unsafe_allow_html=True
        )
        #result_text = '🏆 Team 1 wins!' if prediction[0] == 1 else '❌ Team 1 loose!'
        #st.write(f"**Prediction:** {result_text}")

    st.markdown("<br>" , unsafe_allow_html=True)
    # Berechne die Wahrscheinlichkeit für die Gewinnklasse
    winning_probabilities = model.predict_proba(input_df)
    win_probability = winning_probabilities[0][1].round(2)
    #st.write("Predicted Winning Probability for Team 1:", win_probability)

    st.markdown(
    f'<p style="font-size:22px; font-weight:bold;">Predicted Winning Probability for {winning_team}: {win_probability}%</p>',
    unsafe_allow_html=True)

#-------------------------------------
#prediction = model.predict(input_df)
#winning_team = input_data["Team1"] if prediction[0] == 1 else input_data["Team2"]
from plotly import graph_objects as go  
if model_choice in ['Match-Win Prediction with weather impact (Random Forest)', 'Match-Win Prediction with weather impact (Gradient Boosting)']:
    if st.sidebar.button("Show influence of weather variables"):

        st.markdown(f"#### 📈 Weather Impact for {input_data['Team1']}:")

        # Definiere die Wettervariablen und zugehörige Label (auf Englisch, europäische Einheiten)
        # Erstelle konstanten Input aus den Sidebar-Werten
        constant_input = input_data.copy()

        # Verwende nur das in der Sidebar ausgewählte Team, z.B. Team 1
        selected_team = constant_input["Team1"]

        # Definiere die Wettervariablen und zugehörigen Labels (auf Englisch, europäische Einheiten)
        weather_vars = ["temperature_2m", "wind_speed_10m", "rain", "wind_gusts_10m"]
        var_labels = {
            "temperature_2m": "Temperature (°C)",
            "wind_speed_10m": "Wind Speed (km/h)",
            "wind_gusts_10m": "Wind Gusts (km/h)",
            "rain": "Rain (mm)"
        }

        # Für jede Wettervariable den Wertebereich bestimmen aus df_filtered
        weather_ranges = {}
        for var in weather_vars:
            weather_ranges[var] = np.linspace(df_filtered[var].min(), df_filtered[var].max(), num=50)

        from plotly.subplots import make_subplots
        from plotly import graph_objects as go

        # Erstelle eine 2x2 Subplotfigur, eine Achse pro Wettervariable
        fig = make_subplots(
            rows=2, cols=2, 
            subplot_titles=[""]*4, #[var_labels[var] for var in weather_vars],
            vertical_spacing=0.15,    # Mehr Abstand zwischen den Zeilen; Standard ist meist 0.1
            horizontal_spacing=0.2
        )

        window_size = 5  # Fenstergröße für das Glätten mittels Moving-Average

        # Für jede Wettervariable berechnen wir die Vorhersagen basierend auf dem konstanten Input und dem ausgewählten Team
        for i, var in enumerate(weather_vars):
            predictions = []  # Gewinnwahrscheinlichkeiten sammeln
            xvals = weather_ranges[var]
            
            for val in xvals:
                new_input = constant_input.copy()
                new_input[var] = val
                
                # Hier wird der Input beibehalten, inklusive des bereits ausgewählten Teams
                new_input_df = pd.DataFrame([new_input], columns=model.feature_names_in_)
                
                # Berechne die Gewinnwahrscheinlichkeit für den ausgewählten Fall
                win_prob = model.predict_proba(new_input_df)[0][1]
                predictions.append(win_prob)
                
            predictions = np.array(predictions)
            
            # Glätten der Vorhersagekurve mittels Moving-Average
            smoothed_predictions = np.convolve(predictions, np.ones(window_size) / window_size, mode='same')
            
            # Berechne eine Trendlinie via linearer Regression
            coeffs = np.polyfit(xvals, predictions, 1)
            trend_line = np.polyval(coeffs, xvals)
            
            # Bestimme Position im 2x2 Subplot-Gitter
            row = i // 2 + 1
            col = i % 2 + 1
            
            # Füge die geglättete Kurve hinzu, mit Hinweis auf das ausgewählte Team
            fig.add_trace(
                go.Scatter(
                    x=xvals, 
                    y=smoothed_predictions, 
                    mode='lines',
                    name=f'{var_labels.get(var)} (smoothed) - {selected_team}', 
                    line=dict(width=2)
                ),
                row=row, col=col
            )
            
            # Füge die Trendlinie als gestrichelte Linie hinzu
            fig.add_trace(
                go.Scatter(
                    x=xvals, 
                    y=trend_line, 
                    mode='lines',
                    name=f'{var_labels.get(var)} (trend) - {selected_team}', 
                    line=dict(dash='dash', width=2)
                ),
                row=row, col=col
            )
            
            # Optionale Achsentitel pro Subplot
            fig.update_xaxes(title_text=var_labels.get(var, var), row=row, col=col)
            fig.update_yaxes(title_text="Win Probability", row=row, col=col)

        # Layout des Plots anpassen
        fig.update_layout(
            #title_text="Influence of Weather Variables on Win Probability for selected Team",
            height=700,
            showlegend=False
        )

        st.plotly_chart(fig)





Overwriting pages/2_Prediction_of_Match_Win.py


In [8]:
df_SpikeSum_mitW = pd.read_csv('ML_SpikePoint_MitWetter.csv', sep=';')
df_SpikeSum_ohneW = pd.read_csv('ML_SpikePoint_OHNEWetter.csv', sep=';')

In [9]:
%%writefile pages/3_Prediction_of_Spike_Faults.py
#3_Machine_Learning_Modell_II.py


import streamlit as st
import pandas as pd
import numpy as np
import joblib  # Zum Laden gespeicherter Modelle
import matplotlib.pyplot as plt
import seaborn as sns
import os
import plotly.express as px
from plotly.subplots import make_subplots
from plotly import graph_objects as go
#Pfad festlegen
os.chdir('C:/Users/Katharina/Desktop/Weiterbildung/Bootcamp/Bootcamp/Final_project/streamlit_parts')


# Setze den gewünschten Seitentitel, das Icon und Layout (optional)
st.set_page_config(
    page_title="Prediction of Spike Faults of one team",  # Hier stellst du den angezeigten Namen ein
    #page_icon=":smile:",                # Optional: Icon der Seite
    layout="centered"
)

st.title("🔍 Prediction of Spike Faults of one team")

st.markdown("""
## 📊 Model Options

Choose between two different prediction models:

- 🌀 **With Weather Impact**  
  &nbsp;&nbsp;&nbsp;&nbsp;→ Predicts the total number of *spike faults* in a game, considering weather conditions  
  &nbsp;&nbsp;&nbsp;&nbsp;*(Models: Random Forest & Gradient Boosting)*

- ☀️ **Without Weather Impact**  
  &nbsp;&nbsp;&nbsp;&nbsp;→ Predicts the total number of *spike faults* in a game, without considering weather conditions  
  &nbsp;&nbsp;&nbsp;&nbsp;*(Models: Random Forest & Gradient Boosting)*

---

## 🛠️ Adjust Input Values

Use the filters in the **sidebar** to customize input values and generate more accurate predictions.
""")

#Daten für SpikeFault
df_Reg_mitW = pd.read_csv('ML_SpikeFault_mitWetter.csv', sep=';')
df_Reg_ohneW = pd.read_csv('ML_SpikeFault_OHNEWetter.csv', sep=';')


#Modelle implementieren

model_choice = st.sidebar.selectbox('Choose the model', ['Spike-Fault with weather impact (Random Forest)', 'Spike-Fault with weather impact (Gradient Boosting)',
                                                        'Spike-Fault without weather impact (Random Forest)', 'Spike-Fault without weather impact (Gradient Boosting)',
                                                        #'Total-Spikes with weather impact (Random Forest)','Total-Spikes with weather impact (Gradient Boosting)',
                                                        #'Total-Spikes without weather impact (Random Forest)','Total-Spikes without weather impact (Gradient Boosting)'
                                                        ])

#Lade das Model und die relevanten Daten
if model_choice ==  'Spike-Fault with weather impact (Random Forest)':
    model = joblib.load('RandomForest_SpikeFault_mitWetter_NEUEDATEN.pkl')
    df = df_Reg_mitW
    target_variable = 'SpikeFault'
elif model_choice == 'Spike-Fault with weather impact (Gradient Boosting)': 
    model = joblib.load('GradientBoosting_SpikeFault_mitWetter_NEUEDATEN.pkl')
    df = df_Reg_mitW
    target_variable = 'SpikeFault'

elif model_choice == 'Spike-Fault without weather impact (Random Forest)':
    model = joblib.load('RandomForest_SpikeFault_OHNEWetter_NEUEDATEN.pkl')
    df = df_Reg_ohneW
    target_variable = 'SpikeFault'

elif model_choice == 'Spike-Fault without weather impact (Gradient Boosting)': 
    model = joblib.load('GradientBoosting_SpikeFault_OHNEWetter_NEUEDATEN.pkl')
    df = df_Reg_ohneW
    target_variable = 'SpikeFault'

# Add spacing before showing selected model
st.markdown("<br>", unsafe_allow_html=True)

# Display chosen model with styled text
st.markdown(f"""

---

### ✅ Selected Model  
You have chosen: <span style='font-weight:bold; color:#4CAF50;'>{model_choice}</span>

---

""", unsafe_allow_html=True)

#---------------------------------------------------------
###########################################################
#Filter Geschlecht
###########################################################
#Gender neu definieren
df['Gender_x'] = df['Gender_x'].astype(int)
#Mapping
gender_mapping = {
    0: 'Male',
    1: 'Female'
}
original_gender_values = df['Gender_x'].unique()
display_options = [gender_mapping.get(g, g) for g in original_gender_values]

selected_gender_display = st.sidebar.selectbox("Choose Gender", display_options)

# Um das inverse Mapping zu erstellen, damit wir den Originalwert erhalten:
inverse_mapping = {v: k for k, v in gender_mapping.items()}
selected_gender = inverse_mapping.get(selected_gender_display, selected_gender_display)

# ------------------
# Anwenden der Filter
# ------------------
# Kombiniere beide Filter: Geschlecht und Turnier-Typ
df_filtered = df[(df["Gender_x"] == selected_gender)]

# Eingabeformular für Benutzer
st.sidebar.header('Choose Input Values')


input_data = {}
if model_choice == 'Spike-Fault with weather impact (Random Forest)':
    temperature_val = st.sidebar.slider("Temperature (°C)",  float(df_filtered["temperature_2m"].min()),  float(df_filtered["temperature_2m"].max()))
    wind_speed_val = st.sidebar.slider( "Wind Speed (km/h)", float(df_filtered["wind_speed_10m"].min()), float(df_filtered["wind_speed_10m"].max()))
    wind_gusts_val = st.sidebar.slider( "Wind Gusts (km/h)", float(df_filtered["wind_gusts_10m"].min()), float(df_filtered["wind_gusts_10m"].max()))
    rain_val = st.sidebar.slider("Rain (mm)", float(df_filtered["rain"].min()), float(df_filtered["rain"].max()))

    # Abfrage, ob ein dritter Satz berücksichtigt werden soll
    third_set_selection = st.sidebar.selectbox("Third Set?", options=["No", "Yes"])
    third_set_value = 1 if third_set_selection == "Yes" else 0

    # Falls Third Set = Yes (also third_set_value==1), dann kann der Slider für DurationSet3 angezeigt werden.
    if third_set_value == 1:
        # Filtere den DataFrame basierend auf DurationSet3_indicator
        df_third_filtered = df_filtered[df_filtered["@DurationSet3_indicator"] == third_set_value]
        if not df_third_filtered.empty:
            duration_min = float(df_third_filtered['@DurationSet3'].min())/ 60  # Sekunden in Minuten umwandeln
            duration_max = round(float(df_third_filtered['@DurationSet3'].max()) / 60, 2)        
            
        else:
            duration_min, duration_max = 0.0, 0.0

        duration_val = st.sidebar.slider('Duration Set 3 (in min)', duration_min, duration_max)
    else:
        # Wenn kein Third Set ausgewählt wurde, setzen wir DurationSet3 automatisch auf 0
        duration_val = 0.0



    # Team-Auswahl (unabhängig vom Third Set Filter – oder auch hier ggf. anpassen)
    team1_selection = st.sidebar.selectbox("Team 1", df_filtered["Team1"].unique())

    # Zusammenstellung der Eingabedaten – beachte, dass die Schlüssel exakt den im Modell verwendeten Spaltennamen entsprechen müssen.
    input_data = {
    "temperature_2m": temperature_val,
    "wind_speed_10m": wind_speed_val,
    "wind_gusts_10m": wind_gusts_val,
    "rain": rain_val,
    "@DurationSet3": duration_val,          # Vom Slider (oder automatisch 0)
    "@DurationSet3_indicator": third_set_value,             # 1 bei Third Set = Yes, sonst 0
    "Team1": team1_selection
    }

elif model_choice =='Spike-Fault with weather impact (Gradient Boosting)':

    temperature_val = st.sidebar.slider("Temperature (°C)",  float(df_filtered["temperature_2m"].min()),  float(df_filtered["temperature_2m"].max()))
    wind_speed_val = st.sidebar.slider( "Wind Speed (km/h)", float(df_filtered["wind_speed_10m"].min()), float(df_filtered["wind_speed_10m"].max()))
    wind_gusts_val = st.sidebar.slider( "Wind Gusts (km/h)", float(df_filtered["wind_gusts_10m"].min()), float(df_filtered["wind_gusts_10m"].max()))
    rain_val = st.sidebar.slider("Rain (mm)", float(df_filtered["rain"].min()), float(df_filtered["rain"].max()))

    # Abfrage, ob ein dritter Satz berücksichtigt werden soll
    third_set_selection = st.sidebar.selectbox("Third Set?", options=["No", "Yes"])
    third_set_value = 1 if third_set_selection == "Yes" else 0

    # Falls Third Set = Yes (also third_set_value==1), dann kann der Slider für DurationSet3 angezeigt werden.
    if third_set_value == 1:
        # Filtere den DataFrame basierend auf DurationSet3_indicator
        df_third_filtered = df_filtered[df_filtered["@DurationSet3_indicator"] == third_set_value]
        if not df_third_filtered.empty:
            duration_min = float(df_third_filtered['@DurationSet3'].min())/ 60  # Sekunden in Minuten umwandeln
            duration_max = round(float(df_third_filtered['@DurationSet3'].max()) / 60, 2)        
        else:
            duration_min, duration_max = 0.0, 0.0

        duration_val = st.sidebar.slider('Duration Set 3 (in min)', duration_min, duration_max)
    else:
        # Wenn kein Third Set ausgewählt wurde, setzen wir DurationSet3 automatisch auf 0
        duration_val = 0.0

    # Team-Auswahl (unabhängig vom Third Set Filter – oder auch hier ggf. anpassen)
    team1_selection = st.sidebar.selectbox("Team 1", df_filtered["Team1"].unique())

    # Zusammenstellung der Eingabedaten – beachte, dass die Schlüssel exakt den im Modell verwendeten Spaltennamen entsprechen müssen.
    input_data = {
    "temperature_2m": temperature_val,
    "wind_speed_10m": wind_speed_val,
    "wind_gusts_10m": wind_gusts_val,
    "rain": rain_val,
    "@DurationSet3": duration_val,          # Vom Slider (oder automatisch 0)
    "@DurationSet3_indicator": third_set_value,             # 1 bei Third Set = Yes, sonst 0
    "Team1": team1_selection
    }




elif model_choice =='Spike-Fault without weather impact (Random Forest)':
    ServeFault_val =  st.sidebar.slider("ServeFault", int(df_filtered["ServeFault"].min()), int(df_filtered["ServeFault"].max()))
    ServeTotal_val =  st.sidebar.slider("ServeTotal", int(df_filtered["ServeTotal"].min()), int(df_filtered["ServeTotal"].max()))
    BlockTotal_val = st.sidebar.slider("BlockTotal", int(df_filtered["BlockTotal"].min()), int(df_filtered["BlockTotal"].max()))
    
    # Abfrage, ob ein dritter Satz berücksichtigt werden soll
    third_set_selection = st.sidebar.selectbox("Third Set?", options=["No", "Yes"])
    third_set_value = 1 if third_set_selection == "Yes" else 0

    # Falls Third Set = Yes (also third_set_value==1), dann kann der Slider für DurationSet3 angezeigt werden.
    if third_set_value == 1:
        # Filtere den DataFrame basierend auf DurationSet3_indicator
        df_third_filtered = df_filtered[df_filtered["@DurationSet3_indicator"] == third_set_value]
        if not df_third_filtered.empty:
            duration_min = float(df_third_filtered['@DurationSet3'].min())/ 60  # Sekunden in Minuten umwandeln
            duration_max = round(float(df_third_filtered['@DurationSet3'].max()) / 60, 2)        
        else:
            duration_min, duration_max = 0.0, 0.0

        duration_val = st.sidebar.slider('Duration Set 3 (in min)', duration_min, duration_max)
    else:
        # Wenn kein Third Set ausgewählt wurde, setzen wir DurationSet3 automatisch auf 0
        duration_val = 0.0

    # Team-Auswahl (unabhängig vom Third Set Filter – oder auch hier ggf. anpassen)
    team1_selection = st.sidebar.selectbox("Team 1", df_filtered["Team1"].unique())

    # Zusammenstellung der Eingabedaten – beachte, dass die Schlüssel exakt den im Modell verwendeten Spaltennamen entsprechen müssen.
    input_data = {
    "ServeFault": ServeFault_val,
    "ServeTotal": ServeTotal_val,
    "BlockTotal": BlockTotal_val,
    "@DurationSet3": duration_val,          # Vom Slider (oder automatisch 0)
    "@DurationSet3_indicator": third_set_value,             # 1 bei Third Set = Yes, sonst 0
    "Team1": team1_selection
    }


elif model_choice =='Spike-Fault without weather impact (Gradient Boosting)':
    
    ServeFault_val = st.sidebar.slider("ServeFault", int(df_filtered["ServeFault"].min()), int(df_filtered["ServeFault"].max())),
    ServeTotal_val = st.sidebar.slider("ServeTotal", int(df_filtered["ServeTotal"].min()), int(df_filtered["ServeTotal"].max())),
    BlockTotal_val = st.sidebar.slider("BlockTotal", int(df_filtered["BlockTotal"].min()), int(df_filtered["BlockTotal"].max()))
    
    # Abfrage, ob ein dritter Satz berücksichtigt werden soll
    third_set_selection = st.sidebar.selectbox("Third Set?", options=["No", "Yes"])
    third_set_value = 1 if third_set_selection == "Yes" else 0

    # Falls Third Set = Yes (also third_set_value==1), dann kann der Slider für DurationSet3 angezeigt werden.
    if third_set_value == 1:
        # Filtere den DataFrame basierend auf DurationSet3_indicator
        df_third_filtered = df_filtered[df_filtered["@DurationSet3_indicator"] == third_set_value]
        if not df_third_filtered.empty:
            duration_min = float(df_third_filtered['@DurationSet3'].min())/ 60  # Sekunden in Minuten umwandeln
            duration_max = round(float(df_third_filtered['@DurationSet3'].max()) / 60, 2)
        else:
            duration_min, duration_max = 0.0, 0.0

        duration_val = st.sidebar.slider('Duration Set3 (in min)', duration_min, duration_max)
    else:
        # Wenn kein Third Set ausgewählt wurde, setzen wir DurationSet3 automatisch auf 0
        duration_val = 0.0

    # Team-Auswahl (unabhängig vom Third Set Filter – oder auch hier ggf. anpassen)
    team1_selection = st.sidebar.selectbox("Team 1", df_filtered["Team1"].unique())

    # Zusammenstellung der Eingabedaten – beachte, dass die Schlüssel exakt den im Modell verwendeten Spaltennamen entsprechen müssen.
    input_data = {
    "ServeFault": ServeFault_val,
    "ServeTotal": ServeTotal_val,
    "BlockTotal": BlockTotal_val,
    "@DurationSet3": duration_val,          # Vom Slider (oder automatisch 0)
    "@DurationSet3_indicator": third_set_value,             # 1 bei Third Set = Yes, sonst 0
    "Team1": team1_selection
    }

# Konvertiere Eingabe zu DataFrame
input_df = pd.DataFrame([input_data], columns=model.feature_names_in_)

# Prediction section – only shown after user confirms input
if st.sidebar.button("🔮 Show Prediction"):
    # Generate prediction
    prediction = model.predict(input_df)

    # Add spacing
    st.markdown("<br>", unsafe_allow_html=True)

    # Display result with styled formatting
    st.markdown("""
    ### 📈 Prediction Result
    """)
    st.success(f"**Predicted Performance:** {prediction[0]:.2f} spike faults")

    # Optional: Add more spacing below
    st.markdown("<br><br>", unsafe_allow_html=True)



#---------------------------------------------------------------------
# if model_choice in ['Spike-Fault with weather impact (Random Forest)', 'Spike-Fault with weather impact (Gradient Boosting)']:
#     if st.sidebar.button("Show influence of weather variables"):
#         st.markdown(f"### 📈 Weather Impact for {input_data[Team1]}")

#         # Definiere die Beschriftungen für die Wettervariablen
#         var_labels = {
#             "temperature_2m": "Temperature (°C)",
#             "wind_speed_10m": "Wind Speed (m/s)",
#             "wind_gusts_10m": "Wind Gusts (m/s)",
#             "rain": "Rain (mm)"
#         }

#         # Hole das in der Sidebar ausgewählte Team1 (Standard: erster Eintrag)
#         selected_team = st.session_state.get("team1_selection", df_filtered["Team1"].unique()[0])

#         # Filtere das DataFrame, sodass nur Daten von Team1 herangezogen werden,
#         # falls du die Wetterbereiche und Durchschnittswerte speziell für dieses Team möchtest:
#         df_team1 = df_filtered[df_filtered["Team1"] == selected_team]

#         # Erstelle den Basis-Input, ausgehend von den durchschnittlichen Werten von Team1
#         base_input = {
#             "temperature_2m": st.session_state.get("temperature_val", float(df_team1["temperature_2m"].mean())),
#             "wind_speed_10m": st.session_state.get("wind_speed_val", float(df_team1["wind_speed_10m"].mean())),
#             "wind_gusts_10m": st.session_state.get("wind_gusts_val", float(df_team1["wind_gusts_10m"].mean())),
#             "rain": st.session_state.get("rain_val", float(df_team1["rain"].mean())),
#             "@DurationSet3": st.session_state.get("duration_val", 0.0),
#             "@DurationSet3_indicator": st.session_state.get("third_set_value", 0),
#             "Team1": selected_team
#         }

#         # Definiere, welche Wettervariablen untersucht werden sollen
#         weather_vars = ["temperature_2m", "wind_speed_10m", "wind_gusts_10m", "rain"]

#         # Erstelle ein 2x2 Gitter für die Subplots
#         from plotly.subplots import make_subplots
#         from plotly import graph_objects as go
#         fig = make_subplots(rows=2, cols=2)

#         # Iteriere über die Wettervariablen, berechne für jeden Wertebereiche und Vorhersagen
#         for i, var in enumerate(weather_vars):
#             # Bestimme den Minimal- und Maximalwert aus dem gefilterten DataFrame (nur Team1)
#             vmin = float(df_team1[var].min())
#             vmax = float(df_team1[var].max())
            
#             # Erstelle ein Array mit 100 Werten zwischen vmin und vmax
#             values = np.linspace(vmin, vmax, num=100)
#             predictions = []

#             # Für jeden Wert der aktuellen Wettervariable:
#             for val in values:
#                 # Kopiere den Basis-Datensatz und setze den aktuellen Wetterwert
#                 current_input = base_input.copy()
#                 current_input[var] = val

#                 # Erstelle ein DataFrame, das die Reihenfolge der Features einhält, wie es dein Modell erwartet
#                 input_df = pd.DataFrame([current_input], columns=model.feature_names_in_)

#                 # Berechne die Vorhersage (z. B. Anzahl der Spike Errors)
#                 prediction = model.predict(input_df)[0]
#                 predictions.append(prediction)

#             # Option 1: Glätten mittels Moving Average (Fenstergröße anpassbar)
#             window_size = 5
#             smoothed_predictions = np.convolve(predictions, np.ones(window_size) / window_size, mode='same')

#             # Option 2: Trendlinie mittels linearer Regression
#             coeffs = np.polyfit(values, predictions, 1)
#             trend_line = np.polyval(coeffs, values)

#             # Bestimme den Subplot: 2 Spalten (2x2 Gitter)
#             row = i // 2 + 1
#             col = i % 2 + 1

#             # Füge die geglättete Kurve hinzu
#             fig.add_trace(
#                 go.Scatter(x=values, y=smoothed_predictions, mode='lines', name=f'{var} (smoothed)'),
#                 row=row, col=col
#             )

#             # Füge die Trendlinie (gestrichelt) hinzu
#             fig.add_trace(
#                 go.Scatter(x=values, y=trend_line, mode='lines', name=f'{var} (trend)', line=dict(dash='dash')),
#                 row=row, col=col
#             )

#             # Setze die Achsentitel für jeden Subplot
#             fig.update_xaxes(title_text=var_labels.get(var, var), row=row, col=col)
#             fig.update_yaxes(title_text="Predicted SpikesFaults", row=row, col=col)

#         # Passe das Layout des gesamten Plots an
#         fig.update_layout(
#             title_text="Influence of the weather for sum of Spike Faults for Team 1",
#             height=700,
#             width=900,
#             showlegend=False
#         )

#         st.plotly_chart(fig)

# if model_choice in ['Spike-Fault with weather impact (Random Forest)', 'Spike-Fault with weather impact (Gradient Boosting)']:
#     if st.sidebar.button("Show influence of weather variables"):
#         st.markdown("### 📈 Weather Impact on Spike Errors – All Teams (by Gender)")

#         var_labels = {
#         "temperature_2m": "Temperature (°C)",
#         "wind_speed_10m": "Wind Speed (km/h)",
#         "wind_gusts_10m": "Wind Gusts (km/h)",
#         "rain": "Rain (mm)"
#         }


#         base_input = {
#         "temperature_2m": st.session_state.get("temperature_val", float(df_filtered["temperature_2m"].mean())),
#         "wind_speed_10m": st.session_state.get("wind_speed_val", float(df_filtered["wind_speed_10m"].mean())),
#         "wind_gusts_10m": st.session_state.get("wind_gusts_val", float(df_filtered["wind_gusts_10m"].mean())),
#         "rain": st.session_state.get("rain_val", float(df_filtered["rain"].mean())),
#         "@DurationSet3": st.session_state.get("duration_val", 0.0),
#         "@DurationSet3_indicator": st.session_state.get("third_set_value", 0),
#         "Team1": st.session_state.get("team1_selection", df_filtered["Team1"].unique()[0])
#         }

#         # Definiere, welche Wettervariablen du untersuchen möchtest
#         weather_vars = ["temperature_2m", "wind_speed_10m", "wind_gusts_10m", "rain"]

#         #   Erstelle ein 2x2 Gitter für die Subplots
#         fig = make_subplots(rows=2, cols=2)
#         #, subplot_titles=[None] * len(weather_vars)
#         # Für jede Wettervariable den Wertebereich bestimmen und darin Vorhersagen berechnen
#         for i, var in enumerate(weather_vars):
#             # Bestimme den Minimal- und Maximalwert aus deinen Daten
#             vmin = float(df_filtered[var].min())
#             vmax = float(df_filtered[var].max())
        
#             # Erstelle ein Array mit Werten zwischen vmin und vmax (z.B. 100 Punkte)
#             values = np.linspace(vmin, vmax, num=100)
#             predictions = []

#             # Für jeden Wert der aktuellen Wettervariable:
#             for val in values:
#                 # Kopiere den Basis-Datensatz und setze den aktuellen Wetterwert
#                 current_input = base_input.copy()
#                 current_input[var] = val

#                 # Erstelle ein DataFrame, das die Reihenfolge der Features einhält, wie es dein Modell erwartet
#                 input_df = pd.DataFrame([current_input], columns=model.feature_names_in_)
            
#                 # Vorhersage des Modells (Anzahl der Spikes)
#                 prediction = model.predict(input_df)[0]
#                 predictions.append(prediction)

#             # Option 1: Glätten mittels Moving Average
#             window_size = 5  # anpassbar
#             smoothed_predictions = np.convolve(predictions, np.ones(window_size) / window_size, mode='same')
        
#             # Option 2: Trendlinie mittels linearer Regression
#             coeffs = np.polyfit(values, predictions, 1)
#             trend_line = np.polyval(coeffs, values)

#             # Bestimme die Position im Subplot-Gitter (2 Spalten)
#             row = i // 2 + 1
#             col = i % 2 + 1

#             # ursprüngliche Glättungskurve plotten:
#             fig.add_trace(
#                 go.Scatter(x=values, y=smoothed_predictions, mode='lines', name=f'{var} (smoothed)'),
#                 row=row, col=col
#             )
        
#             # Trendlinie hinzufügen
#             fig.add_trace(
#                 go.Scatter(x=values, y=trend_line, mode='lines', name=f'{var} (trend)', line=dict(dash='dash')),
#                 row=row, col=col
#             )

#             fig.update_xaxes(title_text=var_labels.get(var, var), row=row, col=col)
#             fig.update_yaxes(title_text="Predicted Spike Faults", row=row, col=col)

#             #     # Berechne den minimalen und maximalen y-Wert und füge einen zusätzlichen Margin hinzu
#             # y_min = np.min(predictions)
#             # y_max = np.max(predictions)
#             # margin = (y_max - y_min) * 0.2  # 20% extra Platz an der oberen Seite
            
#             # # Füge eine Linie zur entsprechenden Subplot hinzu
#             # fig.add_trace(
#             #     px.line(x=values, y=predictions).data[0],
#             #     row=row, col=col
#             #     )
#             # # Achsentitel setzen
#             # fig.update_xaxes(title_text=var_labels.get(var, var), row=row, col=col)
#             # fig.update_yaxes(title_text="Predicted Spikes", row=row, col=col,range=[y_min, y_max + margin])

#         # Passe das Layout an (Titel, Abstände, etc.)
#         fig.update_layout(
#             title_text="Influence of the weather for number of SpikesFaults",
#             height=700, width=900,
#             showlegend=False
#         )

#         st.plotly_chart(fig)

Overwriting pages/3_Prediction_of_Spike_Faults.py


In [10]:
%%writefile pages/4_Comparison_of_Beachvolleyball_Players.py
import streamlit as st
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
import pandas as pd
import numpy as np
import joblib  # Zum Laden gespeicherter Modelle
import matplotlib.pyplot as plt
import seaborn as sns
import altair as alt
import plotly.express as px
from utils.data import load_data, get_avg_servepoints_by_tournament

df = load_data()


# Setze den gewünschten Seitentitel, das Icon und Layout (optional)
st.set_page_config(
    page_title="Comparison of Beachvolleyball Players",  # Hier stellst du den angezeigten Namen ein
    #page_icon=":smile:",                # Optional: Icon der Seite
    layout="centered"
)
st.title("🏐 Comparison of Beach Volleyball Players")

# Add a brief introduction
st.markdown("""
In this section, you can compare the performance of different beach volleyball players based on their past game data.

Use the filters below to select players and examine their statistics, allowing you to gain deeper insights into their strengths and weaknesses.

🔍 **How to Compare:**
- Select two players from the dropdown lists.
- Adjust additional filters to refine the comparison.
- Explore various performance metrics, such as *spike faults*, *sets won*, and more!

Start comparing and find out which player stands out!
""")

###########################################################
#Filter Geschlecht
###########################################################
#Gender neu definieren
df['Gender_x'] = df['Gender_x'].astype(int)

#Mapping
gender_mapping = {
    0: 'Male',
    1: 'Female'
}
original_gender_values = df['Gender_x'].unique()
display_options = [gender_mapping.get(g, g) for g in original_gender_values]

# Sidebar
selected_gender_display = st.sidebar.selectbox("Choose Gender", display_options)

# Um das inverse Mapping zu erstellen, damit wir den Originalwert erhalten:
inverse_mapping = {v: k for k, v in gender_mapping.items()}
selected_gender = inverse_mapping.get(selected_gender_display, selected_gender_display)

# 🔹 Season selection: Add "All" option
season_options = ["All"] + sorted({int(s) for s in df["Season"].unique()})
selected_season = st.sidebar.selectbox("Choose Season", season_options)

# 🔹 **Spieler-Liste bleibt stabil** – Nur nach Gender gefiltert, nicht nach Season!
df_team_selection = df[df["Gender_x"] == selected_gender]
spieler_liste = df_team_selection["Full_Name"].unique()

# Zwei einzelne Spieler zur Auswahl anbieten
spieler1 = st.selectbox("Choose Player 1:", spieler_liste, key="spieler1")
spieler2 = st.selectbox("Choose Player 2:", spieler_liste, key="spieler2")

st.markdown("<br>" * 2, unsafe_allow_html=True)
#st.write('The values are the average of all played games by the chosen players')

# 🔹 Apply filters based on selection für den Vergleich der Kennzahlen:
if selected_season == "All":
    # Season-Filter nicht anwenden
    df_filtered_all = df[df["Gender_x"] == selected_gender]
else:
    df_filtered_all = df[(df["Gender_x"] == selected_gender) & (df["Season"] == selected_season)]

# Wenn beide Spieler ausgewählt wurden:
if spieler1 and spieler2:
    if selected_season == "All":
        df_spieler1 = df_filtered_all[df_filtered_all["Full_Name"] == spieler1]
        df_spieler2 = df_filtered_all[df_filtered_all["Full_Name"] == spieler2]
    else:
        df_spieler1 = df_filtered_all[(df_filtered_all["Full_Name"] == spieler1) &
                                      (df_filtered_all["Season"] == selected_season)]
        df_spieler2 = df_filtered_all[(df_filtered_all["Full_Name"] == spieler2) &
                                      (df_filtered_all["Season"] == selected_season)]

    # Liste der Kennzahlen, die verglichen werden sollen
    kennzahlen = ["ServePoint", "DigTotal", "SpikeFault", "ServeFault", "BlockPoint", "ReceptionFault"]

    # Durchschnittswerte der Kennzahlen berechnen, falls es mehrere Beobachtungen pro Spieler gibt
    stats_spieler1 = df_spieler1[kennzahlen].mean()
    stats_spieler2 = df_spieler2[kennzahlen].mean()


    # Erstelle ein DataFrame, das sich gut für Plotly Express eignet (Long-Format):
    daten = {
        "keyfigure": kennzahlen * 2,  # Wiederhole die Liste der Kennzahlen je Spieler
        "value": list(stats_spieler1.values) + list(stats_spieler2.values),
        "player": [spieler1] * len(kennzahlen) + [spieler2] * len(kennzahlen)
    }
    df_plot = pd.DataFrame(daten)

    # Erstelle ein gruppiertes Balkendiagramm (horizontal, damit die Kennzahlen in den Zeilen stehen)
    fig = px.bar(
        df_plot,
        x="value",
        y="keyfigure",
        color="player",
        barmode="group",
        orientation="h",
        title="Comparison of players",
        text="value"
    )
    
    
    # Rundet die Zahlen, die als Text angezeigt werden, auf zwei Nachkommastellen
    fig.update_traces(texttemplate='%{text:.2f}', textposition='outside', hovertemplate='%{y}: %{x:.2f}', cliponaxis=False)
    fig.update_layout(width = 1800, margin=dict(l=50, r=150, t=50, b=50), xaxis_title="", yaxis_title="",xaxis=dict(showticklabels=False))
    st.plotly_chart(fig)


# Add spacing before the description
st.markdown("<br>" * 2, unsafe_allow_html=True)

# Add explanation about the average values
st.write("""
The values represent the **average** of all played games by the selected players.

This gives you a clearer overview of their overall performance and consistency across various matches.
""")

# Placeholder for the plots (to be generated later)
# Your code to generate plots goes here...


Overwriting pages/4_Comparison_of_Beachvolleyball_Players.py


In [11]:
%%writefile utils/data.py

# data.py
import os
import pandas as pd
os.chdir('C:/Users/Katharina/Desktop/Weiterbildung/Bootcamp/Bootcamp/Final_project/streamlit_parts')

def load_data(path="Playerdata.csv"):
    df = pd.read_csv(path, sep=";")
    return df

def get_avg_servepoints_by_tournament(df):
    grouped = (
        df.groupby(["Full_Name", "Season"], as_index=False)["ServePoint"]
        .mean()
        .rename(columns={"ServePoint": "AverageServePoints"})
    )
    return grouped


Overwriting utils/data.py


In [12]:
df = pd.read_csv("TeamStatistik1.csv", sep=';')

In [13]:
%%writefile pages/5_Comparison_of_Beachvolleyball_Teams.py
import streamlit as st
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
import pandas as pd
import numpy as np
import joblib  # Zum Laden gespeicherter Modelle
import matplotlib.pyplot as plt
import seaborn as sns
import altair as alt
import plotly.express as px

os.chdir('C:/Users/Katharina/Desktop/Weiterbildung/Bootcamp/Bootcamp/Final_project/streamlit_parts')
df = pd.read_csv("TeamStatistik1.csv", sep=';')

# Setze den gewünschten Seitentitel, das Icon und Layout (optional)
st.set_page_config(
    page_title="Comparison of Beachvolleyball-Teams",  # Hier stellst du den angezeigten Namen ein
    #page_icon=":smile:",                # Optional: Icon der Seite
    layout="centered"
)
st.title("🏐 Comparison of Beachvolleyball-Teams")

# Add an introduction for the comparison
st.markdown("""
In this section, you can compare the performance of different beach volleyball teams based on their match data.

By selecting two teams, you can analyze various performance metrics, such as **total block points**, **spike faults**, **serve points**, and more.

🔍 **How to Compare:**
- Select two teams from the dropdown lists.
- Adjust additional filters for a more customized comparison.
- Explore the statistical data for both teams and see how they stack up against each other.

Start comparing and discover which team has the edge in different aspects of the game!
""")
###########################################################
#Filter Geschlecht
###########################################################
#Gender neu definieren
df['Gender_x'] = df['Gender_x'].astype(int)

#Mapping
gender_mapping = {
    0: 'Male',
    1: 'Female'
}
original_gender_values = sorted(df['Gender_x'].unique())
display_options = [gender_mapping.get(g, str(g)) for g in original_gender_values]


selected_gender_display = st.sidebar.selectbox("Choose Gender", display_options)
# Um das inverse Mapping zu erstellen, damit wir den Originalwert erhalten:
inverse_mapping = {v: k for k, v in gender_mapping.items()}
selected_gender = inverse_mapping.get(selected_gender_display, selected_gender_display)

#-----------------------------------------------
# 🔹 Season selection: Add "All" option
season_options = ["All"] + sorted({int(s) for s in df["Season"].unique()})
selected_season = st.sidebar.selectbox("Choose Season", season_options)

# 🔹 **Spieler-Liste bleibt stabil** – Nur nach Gender gefiltert, nicht nach Season!
df_team_selection = df[df["Gender_x"] == selected_gender]
spieler_liste = df_team_selection["TeamNameFull"].unique()

# Zwei einzelne Spieler zur Auswahl anbieten
spieler1 = st.selectbox("Choose Team 1:", spieler_liste, key="spieler1")
spieler2 = st.selectbox("Choose Team 2:", spieler_liste, key="spieler2")

st.markdown("<br>" * 2, unsafe_allow_html=True)


# 🔹 Apply filters based on selection für den Vergleich der Kennzahlen:
if selected_season == "All":
    # Season-Filter nicht anwenden
    df_filtered_all = df[df["Gender_x"] == selected_gender]
else:
    df_filtered_all = df[(df["Gender_x"] == selected_gender) & (df["Season"] == selected_season)]

# Wenn beide Spieler ausgewählt wurden:
if spieler1 and spieler2:
    if selected_season == "All":
        df_spieler1 = df_filtered_all[df_filtered_all["TeamNameFull"] == spieler1]
        df_spieler2 = df_filtered_all[df_filtered_all["TeamNameFull"] == spieler2]
    else:
        df_spieler1 = df_filtered_all[(df_filtered_all["TeamNameFull"] == spieler1) &
                                      (df_filtered_all["Season"] == selected_season)]
        df_spieler2 = df_filtered_all[(df_filtered_all["TeamNameFull"] == spieler2) &
                                      (df_filtered_all["Season"] == selected_season)]

    # Liste der Kennzahlen, die verglichen werden sollen
    kennzahlen = ["ServePoint", "DigTotal", "SpikeFault", "ServeFault", "BlockPoint", "ReceptionFault"]

    # Durchschnittswerte der Kennzahlen berechnen, falls es mehrere Beobachtungen pro Spieler gibt
    stats_spieler1 = df_spieler1[kennzahlen].mean()
    stats_spieler2 = df_spieler2[kennzahlen].mean()



    # Erstelle ein DataFrame, das sich gut für Plotly Express eignet (Long-Format):
    daten = {
        "keyfigure": kennzahlen * 2,  # Wiederhole die Liste der Kennzahlen je Spieler
        "value": list(stats_spieler1.values) + list(stats_spieler2.values),
        "Team": [spieler1] * len(kennzahlen) + [spieler2] * len(kennzahlen)
    }
    df_plot = pd.DataFrame(daten)

    # Erstelle ein gruppiertes Balkendiagramm (horizontal, damit die Kennzahlen in den Zeilen stehen)
    fig = px.bar(
        df_plot,
        x="value",
        y="keyfigure",
        color="Team",
        barmode="group",
        orientation="h",
        title="Comparison of teams",
        text="value"
    )
    # Rundet die Zahlen, die als Text angezeigt werden, auf zwei Nachkommastellen
    fig.update_traces(texttemplate='%{text:.2f}', textposition='outside', hovertemplate='%{y}: %{x:.2f}', cliponaxis=False)
    fig.update_layout(margin=dict(l=50, r=150, t=50, b=50), xaxis_title="", yaxis_title="",xaxis=dict(showticklabels=False))
    st.plotly_chart(fig)

st.write("""
The values represent the **average** of all played games by the selected teams.

This provides a clearer overview of their overall performance and consistency across various matches, helping you compare their strengths and weaknesses over time.
""")


Overwriting pages/5_Comparison_of_Beachvolleyball_Teams.py
