In [None]:
import pandas as pd
from dash import Dash, html, dcc, Input, Output

In [None]:
# Players dataset
# Load data
file_paths = ["player_defense", "player_gca", "player_keepers", "player_keepersadv",
              "player_misc", "player_passing", "player_passing_types", "player_playingtime",
              "player_possession", "player_shooting", "player_stats"]

players_data = {name: pd.read_csv(f"Players/{name}.csv") for name in file_paths}


# Clean and preprocess data
def preprocess_data(players_data):
    for name, df in players_data.items():
        # Drop rows where all elements are NaN
        df.dropna(how='all', inplace=True)

        # Replace missing values
        numeric_cols = df.select_dtypes(include=['number']).columns
        categorical_cols = df.select_dtypes(include=['object']).columns
        df[numeric_cols] = df[numeric_cols].fillna(df[numeric_cols].median())
        df[categorical_cols] = df[categorical_cols].fillna(df[categorical_cols].mode().iloc[0])

        # Filter based on 'minutes_90s'
        if 'minutes_90s' in df.columns:
            df = df[df['minutes_90s'] >= 1.0]

        players_data[name] = df


preprocess_data(players_data)


# Report missing values after preprocessing
def report_missing(players_data):
    for name, df in players_data.items():
        print(f"After cleaning, missing values in {name}:", df.isnull().sum().sum())


report_missing(players_data)


# Export cleaned data
def export_data(players_data):
    for name, df in players_data.items():
        df.to_csv(f"CleanedData/{name}.csv", index=False)


export_data(players_data)


In [None]:

# Team dataset
# Load CSV data
def load_csv_data(file_path):
    data = pd.read_csv(file_path)
    # Fill missing values with the median for numerical and mode for categorical columns
    for col in data.columns:
        if data[col].dtype == 'object':
            data[col] = data[col].fillna(data[col].mode()[0])
        else:
            data[col] = data[col].fillna(data[col].median())
    return data


# Load JSON data
def load_json_data(file_path):
    return pd.read_json(file_path, orient='index')


# Data files
group_stats_path = "Team/group_stats.csv"
team_data_path = "Team/team_data.csv"
team_tips_path = "Team/team_tips.json"

# Loading data
df_group_stats = load_csv_data(group_stats_path)
df_team_data = load_csv_data(team_data_path)
df_team_tips = load_json_data(team_tips_path)

df_merged_team = pd.merge(df_team_data, df_group_stats, on='team', how='inner')

# Display the first few entries of the data
print(df_team_data.head())
print(df_group_stats.head())
print(df_team_tips.head())


# Export cleaned data
def export_data(df_merged_team_data):
    for name, df in df_merged_team_data.items():
        df.to_csv(f"CleanedData/{name}.csv", index=False)


export_data(df_merged_team)

# Show merged data
print(df_merged_team.head())


In [None]:

# Match dataset
# Load the data
def load_data(file_path):
    data = pd.read_csv(file_path)

    # Handle missing values: fill numerical with median and categorical with mode
    numeric_cols = data.select_dtypes(include=['number']).columns
    categorical_cols = data.select_dtypes(include=['object']).columns
    data[numeric_cols] = data[numeric_cols].fillna(data[numeric_cols].median())
    data[categorical_cols] = data[categorical_cols].fillna(data[categorical_cols].mode().iloc[0])

    return data


# Define the path to the dataset
match_data_path = "Match Data/data.csv"

# Load and preprocess the data
df_match_data = load_data(match_data_path)

# Display the first few rows of the processed data
print(df_match_data.head())

# Further transformations or aggregations here

# Saving the processed data back to a new CSV for easier access in future analyses
df_match_data.to_csv('CleanedData/processed_match_data.csv', index=False)


## Issue because the Images dataset has really crappy names so this doesnt work unless we create a new file with just one image and the correct name per player 


# Images dataset
# Load your player and team data
df_players_names = pd.read_csv('Images/List Of All Players Names.csv')
df_teams = pd.read_csv('Players/player_misc.csv', usecols=['player', 'team'])

# Group to team mapping (hard-coded for simplicity)
group_mapping = {
    'Group A': ['Ecuador Players', 'Netherland Players', 'Qatar Players', 'Senegal Players'],
    'Group B': ['England Players', 'Iran Players', 'United States Players', 'Wales Players'],
    'Group C': ['Argentina Players', 'Mexico Players', 'Poland Players', 'Saudi Arabia Players'],
    'Group D': ['Australia Players', 'Denmark Players', 'France Players', 'Tunisia Players'],
    'Group E': ['Costa Rica Players', 'Germany Players', 'Japan Players', 'Spain Players'],
    'Group F': ['Belgium Players', 'Canada Players', 'Croatia Players', 'Morocco Players'],
    'Group G': ['Brazil Players', 'Cameroon Players', 'Serbia Players', 'Switzerland Players'],
    'Group H': ['Ghana Players', 'Portugal Players', 'South Korea Players', 'Uruguay Players']
}

# Reverse the mapping for easy lookup: Team -> Group
team_to_group = {team: group for group, teams in group_mapping.items() for team in teams}

app = Dash(__name__)

app.layout = html.Div([
    dcc.Dropdown(
        id='player-dropdown',
        options=[{'label': player, 'value': player} for player in df_players_names['Name_Player']],
        value='Select a player'
    ),
    html.Img(id='player-image')
])


@app.callback(
    Output('player-image', 'src'),
    [Input('player-dropdown', 'value')]
)
def update_image(selected_player):
    if selected_player and selected_player != 'Select a player':
        # Find the team for the selected player
        team = df_teams.loc[df_teams['player'] == selected_player, 'team'].iloc[0]
        # Find the group for the team
        group = team_to_group.get(team, 'Unknown')
        # Construct the path to the image
        image_path = f"/Images/Images/Images/{group}/{team} Players/Images_{selected_player}/{selected_player}1.jpg"
        return app.get_asset_url(image_path)
    return None


if __name__ == '__main__':
    app.run_server(debug=True)
    
