In [72]:
pip install pulp



In [73]:
import pandas as pd
from sklearn.metrics.pairwise import euclidean_distances
from IPython.display import display
from pulp import LpProblem, LpVariable, lpSum, LpMaximize, LpStatus
from collections import Counter
import numpy as np
import random
import difflib

class MyPlayerStats:
    def __init__(self, input_df, correct_shirt_numbers):
        self.input_df = input_df
        self.correct_shirt_numbers = correct_shirt_numbers
        self.columns = [
            'position', 'shirt_number', 'goals', 'total_shots',
            'shots_on_target', 'shots_off_target', 'blocked_shots', 'saved_shots', 'total_passes', 'accurate_passes',
            'pass_success', 'key_passes', 'dribbles_attempted', 'dribbles', 'dribbles_success', 'dribbles_past',
            'aerial_duels', 'aerial_success', 'offensive_aerials', 'defensive_aerials', 'tackles_attempted',
            'tackles_won', 'tackles_success', 'clearances', 'interceptions', 'injuries', 'distance_covered',
            'avg_speed', 'highest_speed'
        ]

    def correct_shirt_numbers_func(self):
        def find_closest_shirt_number(incorrect_number):
            correct_number = difflib.get_close_matches(str(incorrect_number), self.correct_shirt_numbers, n=1, cutoff=0.6)
            return correct_number[0] if correct_number else incorrect_number

        self.input_df['corrected_shirt_number'] = self.input_df['shirt_number'].apply(find_closest_shirt_number)

    def process_data(self):
        self.correct_shirt_numbers_func()

        grouped = self.input_df.groupby(['corrected_shirt_number']).agg({
            'position': 'first',
            'goals': 'sum',
            'total_shots': 'sum',
            'shots_on_target': 'sum',
            'shots_off_target': 'sum',
            'blocked_shots': 'sum',
            'saved_shots': 'sum',
            'total_passes': 'sum',
            'accurate_passes': 'sum',
            'pass_success': lambda x: x[x != 0].mean() if (x != 0).any() else 0,
            'key_passes': 'sum',
            'dribbles_attempted': 'sum',
            'dribbles': 'sum',
            'dribbles_success': lambda x: x[x != 0].mean() if (x != 0).any() else 0,
            'dribbles_past': 'sum',
            'aerial_duels': 'sum',
            'aerial_success': lambda x: x[x != 0].mean() if (x != 0).any() else 0,
            'offensive_aerials': 'sum',
            'defensive_aerials': 'sum',
            'tackles_attempted': 'sum',
            'tackles_won': 'sum',
            'tackles_success': lambda x: x[x != 0].mean() if (x != 0).any() else 0,
            'clearances': 'sum',
            'interceptions': 'sum',
            'injuries': 'sum',
            'distance_covered': 'sum',
            'avg_speed': lambda x: x[x != 0].mean() if (x != 0).any() else 0,
            'highest_speed': 'max',
        }).reset_index()

        return grouped


class FirstModel:
    def __init__(self, teams, data_cleaned):
        self.teams = teams
        self.data_cleaned = data_cleaned
        self.features_to_compare = [
            'goals', 'goals_past', 'total_shots', 'shots_on_target', 'shots_off_target',
            'blocked_shots', 'total_possession', 'total_passes', 'accurate_passes',
            'pass_success', 'key_passes', 'dribbles_attempted', 'dribbles',
            'dribbles_past', 'dribbles_success', 'aerials_won', 'aerial_success',
            'offensive_aerials', 'defensive_aerials', 'tackles', 'tackle_success',
            'tackles_attempted', 'clearances', 'interceptions', 'corners', 'defenders',
            'midfielders', 'attackers', 'advanced_midfielders'
        ]

    def split_formation(self, df):
        formation_split = df['formations'].str.split('-')
        df['defenders'] = formation_split.str[0].astype(int)
        df['midfielders'] = formation_split.str[1].astype(int)
        df['attackers'] = formation_split.str[-1].astype(int)
        df['advanced_midfielders'] = formation_split.apply(lambda x: int(x[2]) if len(x) > 3 else 0)
        return df

    def find_similar_rows(self):


        lost_games = self.data_cleaned[self.data_cleaned['lose'] == 1]
        lost_games = self.split_formation(lost_games)

        input_row = self.teams.iloc[0].to_dict()
        input_df = pd.DataFrame([input_row])
        input_df = self.split_formation(input_df)

        filtered_lost_games = lost_games[self.features_to_compare]
        distances = euclidean_distances(filtered_lost_games, input_df[self.features_to_compare])
        lost_games['distance'] = distances

        top_n = 100
        similar_rows = lost_games.sort_values(by='distance').head(top_n)
        similar_rows = similar_rows.drop(columns=['distance'])

        return similar_rows

    def find_winning_rows(self, similar_rows):
        lost_game_ids = similar_rows['id_match'].unique()
        winning_rows = self.data_cleaned[(self.data_cleaned['id_match'].isin(lost_game_ids)) & (self.data_cleaned['win'] == 1)]
        games = winning_rows

      # Apply the split functions
        games = self.split_formation(games)

        # Step 2: Input row (split the score and formation in the same way)
        input_row = self.teams.iloc[1].to_dict()

      # Convert the input row to DataFrame
        input_df = pd.DataFrame([input_row])

      # Apply score and formation splitting to the input row
        input_df = self.split_formation(input_df)

# Step 4: Filter the lost games to keep only the relevant features
        filtered_games = games[self.features_to_compare]

# Step 5: Calculate Euclidean distances between the input row and all won games
        distances = euclidean_distances(filtered_games, input_df[self.features_to_compare])

# Step 6: Add distances to the filtered lost games DataFrame
        games['distance'] = distances

# Step 7: Sort by similarity (smallest distance first) and get top similar rows
        top_n = 10  # Number of similar rows to return
        similar_rows = games.sort_values(by='distance').head(top_n)

# Step 8: Drop the distance column if no longer needed
        similar_rows = similar_rows.drop(columns=['distance'])
        pd.set_option('display.max_columns', None)
        pd.set_option('display.max_rows', None)

# Output the most similar rows as a table
        display(similar_rows)
        similar_rows.to_csv('/content/recommended teams.csv', index=False)
        return similar_rows

class SecondModel:
    def __init__(self, input_row, player_data):
        self.input_row = input_row
        self.player_data = player_data
        self.STATS_TO_MAXIMIZE = {
            'Goalkeeper': ['aerial_success', 'blocked_shots'],
            'Right-back': ['tackles_attempted', 'tackle_success', 'clearances', 'interceptions', 'dribbles_success', 'aerial_success', 'pass_success'],
            'Left-back': ['tackles_attempted', 'tackle_success', 'clearances', 'interceptions', 'dribbles_success', 'aerial_success', 'pass_success'],
            'Center-back': ['tackles_attempted', 'tackle_success', 'clearances', 'interceptions', 'aerial_success', 'pass_success', 'defensive_aerials'],
            'Defensive Midfielder': ['tackles_attempted', 'interceptions', 'pass_success', 'key_passes', 'defensive_aerials'],
            'Central Midfielder': ['total_passes', 'accurate_passes', 'key_passes', 'dribbles_attempted', 'dribbles_success', 'pass_success', 'dribbles'],
            'Attacking Midfielder': ['goals', 'key_passes', 'dribbles_attempted', 'dribbles_success', 'pass_success', 'shots_on_target', 'dribbles', 'offensive_aerials'],
            'Right Winger': ['goals', 'assists', 'key_passes', 'dribbles_attempted', 'dribbles_success', 'crossing_accuracy', 'shots_on_target', 'pass_success', 'dribbles'],
            'Left Winger': ['goals', 'assists', 'key_passes', 'dribbles_attempted', 'dribbles_success', 'crossing_accuracy', 'shots_on_target', 'pass_success', 'dribbles'],
            'Striker': ['goals', 'shots_on_target', 'total_shots', 'key_passes', 'aerial_success', 'dribbles_success', 'pass_success', 'offensive_aerials'],
            'Center Forward': ['goals', 'shots_on_target', 'total_shots', 'key_passes', 'aerial_success', 'dribbles_success', 'pass_success', 'offensive_aerials']
        }
        self.STATS_TO_MINIMIZE = {
            'Goalkeeper': ['goals_past', 'dribbles_past'],
            'Right-back': ['dribbles_past', 'injuries'],
            'Left-back': ['dribbles_past', 'injuries'],
            'Center-back': ['dribbles_past'],
            'Defensive Midfielder': ['dribbles_past', 'injuries'],
            'Central Midfielder': ['injuries'],
            'Attacking Midfielder': ['shots_off_target'],
            'Right Winger': ['shots_off_target', 'injuries'],
            'Left Winger': ['shots_off_target', 'injuries'],
            'Striker': ['shots_off_target', 'injuries'],
            'Center Forward': ['shots_off_target', 'injuries']
        }
        self.ABSENT_FEATURES_TO_MAXIMIZE = {
            'Goalkeeper': ['saved_shots', 'avg_speed', 'highest_speed', 'aerial_duels'],
            'Right-back': ['distance_covered', 'avg_speed', 'highest_speed', 'tackles_won'],
            'Left-back': ['distance_covered', 'avg_speed', 'highest_speed', 'tackles_won'],
            'Center-back': ['distance_covered', 'avg_speed', 'highest_speed', 'tackles_won'],
            'Defensive Midfielder': ['distance_covered', 'aerial_duels', 'tackles_won'],
            'Central Midfielder': ['distance_covered', 'aerial_duels'],
            'Attacking Midfielder': ['highest_speed'],
            'Right Winger': ['highest_speed', 'distance_covered'],
            'Left Winger': ['highest_speed', 'distance_covered'],
            'Striker': ['aerial_duels'],
            'Center Forward': ['aerial_duels']
        }
        self.FEATURE_INDEX_MAP = {
            'goals': 0,
            'total_shots': 1,
            'shots_on_target': 2,
            'shots_off_target': 3,
            'blocked_shots': 4,
            'total_passes': 5,
            'accurate_passes': 6,
            'pass_success': 7,
            'key_passes': 8,
            'dribbles_attempted': 9,
            'dribbles': 10,
            'dribbles_success': 11,
            'dribbles_past': 12,
            'aerial_success': 13,
            'tackles_attempted': 14,
            'tackles_success': 15,
            'clearances': 16,
            'interceptions': 17,
            'offensive_aerials': 18,
            'defensive_aerials': 19,
            'distance_covered': 20,
            'avg_speed': 21,
            'highest_speed': 22,
            'saved_shots': 23,
            'aerial_duels': 24,
            'tackles_won': 25
        }

    def count_players_per_position(self, formation):
        formation_numbers = list(map(int, formation.split('-')))
        position_groups = {
            'defender': ['Right-back', 'Left-back', 'Center-back'],
            'midfielder': ['Defensive Midfielder', 'Central Midfielder', 'Attacking Midfielder', 'Right Winger', 'Left Winger'],
            'forward': ['Striker', 'Center Forward']
        }
        position_counter = Counter()
        num_defenders = formation_numbers[0]
        num_midfielders = formation_numbers[1] if len(formation_numbers) > 1 else 0
        num_midfielders_advanced = formation_numbers[2] if len(formation_numbers) > 3 else 0
        num_forwards = formation_numbers[-1]

        if num_defenders == 4:
            position_counter['Center-back'] = 2
            position_counter['Right-back'] = 1
            position_counter['Left-back'] = 1
        elif num_defenders == 3:
            position_counter['Center-back'] = 3
        elif num_defenders == 5:
            position_counter['Center-back'] = 3
            position_counter['Right-back'] = 1
            position_counter['Left-back'] = 1

        if num_midfielders == 4:
            position_counter['Central Midfielder'] = 2
            position_counter['Right Winger'] = 1
            position_counter['Left Winger'] = 1
        elif num_midfielders == 3:
            position_counter['Central Midfielder'] = 2
            position_counter['Attacking Midfielder'] = 1
        elif num_midfielders == 5:
            position_counter['Central Midfielder'] = 3
            position_counter['Right Winger'] = 1
            position_counter['Left Winger'] = 1
        elif num_midfielders == 2:
            position_counter['Central Midfielder'] = 2
        elif num_midfielders == 1:
            position_counter['Central Midfielder'] = 1

        if num_midfielders_advanced > 0:
            position_counter['Attacking Midfielder'] += num_midfielders_advanced

        if num_forwards == 2:
            position_counter['Striker'] = 1
            position_counter['Center Forward'] = 1
        elif num_forwards == 1:
            position_counter['Striker'] = 1
        elif num_forwards >= 3:
            position_counter['Striker'] = 2
            position_counter['Center Forward'] = 1

        return position_counter

    def get_relevant_feature_indices(self, position):
        max_features = [self.FEATURE_INDEX_MAP[feat] for feat in self.STATS_TO_MAXIMIZE[position] if feat in self.FEATURE_INDEX_MAP]
        min_features = [self.FEATURE_INDEX_MAP[feat] for feat in self.STATS_TO_MINIMIZE[position] if feat in self.FEATURE_INDEX_MAP]
        return max_features, min_features

    def objective(self, player_vars, player_stats, target_stats, max_features, min_features, absent_features):
        team_max_stats = [lpSum(player_vars[i] * player_stats[i][j] for i in range(len(player_vars))) for j in max_features]
        team_min_stats = [lpSum(player_vars[i] * player_stats[i][j] for i in range(len(player_vars))) for j in min_features]
        absent_stats = [lpSum(player_vars[i] * player_stats[i][j] for i in range(len(player_vars))) for j in absent_features]

        maximize_term = lpSum(lpSum(team_max_stats[j] - target_stats[max_features[j]] for j in range(len(max_features))))
        maximize_absent_term = lpSum(absent_stats)
        minimize_term = lpSum(lpSum(target_stats[min_features[j]] - team_min_stats[j] for j in range(len(min_features))))

        return maximize_term + maximize_absent_term - minimize_term

    def recommend_team(self):
        formation = self.input_row['formations']
        position_counts = self.count_players_per_position(formation)
        self.player_data = self.player_data.reset_index(drop=True)

        num_goalkeepers = 1
        num_right_backs = position_counts['Right-back']
        num_left_backs = position_counts['Left-back']
        num_center_backs = position_counts['Center-back']
        num_defensive_midfielders = position_counts['Defensive Midfielder']
        num_central_midfielders = position_counts['Central Midfielder']
        num_attacking_midfielders = position_counts['Attacking Midfielder']
        num_right_wingers = position_counts['Right Winger']
        num_left_wingers = position_counts['Left Winger']
        num_strikers = position_counts['Striker']
        num_center_forwards = position_counts['Center Forward']
        self.player_data.columns = self.player_data.columns.str.lower()

        goalkeepers = self.player_data[self.player_data['position'].str.contains('Goalkeeper')].index
        right_backs = self.player_data[self.player_data['position'].str.contains('Right-back')].index
        left_backs = self.player_data[self.player_data['position'].str.contains('Left-back')].index
        center_backs = self.player_data[self.player_data['position'].str.contains('Center-back')].index
        defensive_midfielders = self.player_data[self.player_data['position'].str.contains('Defensive Midfielder')].index
        central_midfielders = self.player_data[self.player_data['position'].str.contains('Central Midfielder')].index
        attacking_midfielders = self.player_data[self.player_data['position'].str.contains('Attacking Midfielder')].index
        right_wingers = self.player_data[self.player_data['position'].str.contains('Right Winger')].index
        left_wingers = self.player_data[self.player_data['position'].str.contains('Left Winger')].index
        strikers = self.player_data[self.player_data['position'].str.contains('Striker')].index
        center_forwards = self.player_data[self.player_data['position'].str.contains('Center Forward')].index

        player_stats = self.player_data[['goals', 'total_shots', 'shots_on_target', 'shots_off_target', 'blocked_shots',
                                          'total_passes', 'accurate_passes', 'pass_success', 'key_passes', 'dribbles_attempted',
                                          'dribbles', 'dribbles_success', 'dribbles_past', 'aerial_success',
                                          'tackles_attempted', 'tackles_success', 'clearances', 'interceptions', 'offensive_aerials', 'defensive_aerials',
                                          'injuries', 'distance_covered', 'avg_speed', 'highest_speed', 'saved_shots', 'aerial_duels',
                                          'tackles_won']].values

        target_stats = np.array([self.input_row['goals'], self.input_row['total_shots'], self.input_row['shots_on_target'],
                                 self.input_row['shots_off_target'], self.input_row['blocked_shots'], self.input_row['total_passes'],
                                 self.input_row['accurate_passes'], self.input_row['pass_success'], self.input_row['key_passes'], self.input_row['dribbles_attempted'],
                                 self.input_row['dribbles'], self.input_row['dribbles_success'], self.input_row['dribbles_past'],
                                 self.input_row['aerial_success'], self.input_row['tackles_attempted'], self.input_row['tackles_success'],
                                 self.input_row['clearances'], self.input_row['interceptions'], self.input_row['goals_past'],
                                 self.input_row['aerials_won'], self.input_row['offensive_aerials'], self.input_row['defensive_aerials'],
                                 self.input_row['tackles']])

        problem = LpProblem("Team_Selection", LpMaximize)
        player_vars = [LpVariable(f'player_{i}', lowBound=0, cat='Binary') for i in range(len(self.player_data))]

        problem += lpSum([player_vars[i] for i in goalkeepers]) == num_goalkeepers, "Goalkeeper_Constraint"
        problem += lpSum([player_vars[i] for i in right_backs]) == num_right_backs, "Right_Back_Constraint"
        problem += lpSum([player_vars[i] for i in left_backs]) == num_left_backs, "Left_Back_Constraint"
        problem += lpSum([player_vars[i] for i in center_backs]) == num_center_backs, "Center_Back_Constraint"
        problem += lpSum([player_vars[i] for i in defensive_midfielders]) == num_defensive_midfielders, "Defensive_Midfielder_Constraint"
        problem += lpSum([player_vars[i] for i in central_midfielders]) == num_central_midfielders, "Central_Midfielder_Constraint"
        problem += lpSum([player_vars[i] for i in attacking_midfielders]) == num_attacking_midfielders, "Attacking_Midfielder_Constraint"
        problem += lpSum([player_vars[i] for i in right_wingers]) == num_right_wingers, "Right_Winger_Constraint"
        problem += lpSum([player_vars[i] for i in left_wingers]) == num_left_wingers, "Left_Winger_Constraint"
        problem += lpSum([player_vars[i] for i in strikers]) == num_strikers, "Striker_Constraint"
        problem += lpSum([player_vars[i] for i in center_forwards]) == num_center_forwards, "Center_Forward_Constraint"

        injury_limit = 3
        for i in range(len(player_vars)):
            problem += player_vars[i] * self.player_data['injuries'].iloc[i] <= injury_limit, f'Injury_Constraint_{i}'

        positions = {
            'Goalkeeper': goalkeepers,
            'Right-back': right_backs,
            'Left-back': left_backs,
            'Center-back': center_backs,
            'Defensive Midfielder': defensive_midfielders,
            'Central Midfielder': central_midfielders,
            'Attacking Midfielder': attacking_midfielders,
            'Right Winger': right_wingers,
            'Left Winger': left_wingers,
            'Striker': strikers,
            'Center Forward': center_forwards
        }

        for position, indices in positions.items():
            max_features, min_features = self.get_relevant_feature_indices(position)
            absent_features = [self.FEATURE_INDEX_MAP[feat] for feat in self.ABSENT_FEATURES_TO_MAXIMIZE[position] if feat in self.FEATURE_INDEX_MAP]

            if len(indices) > 0:
                problem += self.objective(player_vars, player_stats, target_stats, max_features, min_features, absent_features)

        problem.solve()

        if LpStatus[problem.status] == 'Optimal':
            selected_players = self.player_data[[player_vars[i].varValue > 0 for i in range(len(self.player_data))]]
            team_stats = selected_players[['goals', 'total_shots', 'shots_on_target', 'shots_off_target', 'blocked_shots', 'saved_shots',
                                           'total_passes', 'accurate_passes', 'pass_success', 'key_passes', 'dribbles_attempted', 'dribbles',
                                           'dribbles_success', 'aerial_duels', 'aerial_success', 'offensive_aerials', 'defensive_aerials',
                                           'tackles_attempted', 'tackles_won', 'tackles_success', 'clearances', 'interceptions', 'injuries',
                                           'distance_covered', 'avg_speed', 'highest_speed']].sum()
            return selected_players, team_stats
        else:
            print("No optimal solution found.")
            return None, None



# Usage

mobile_data1 = pd.read_csv('/content/drive/MyDrive/CSV files/mobile_data.csv')
mobile_data2 = pd.read_csv('/content/drive/MyDrive/CSV files/mobile_data 2.csv')

correct_shirt_numbers = [str(num) for num in mobile_data1['Shirt_Number']]

player_data1 = pd.read_csv('/content/drive/MyDrive/CSV files/team_1_player_statistics (16).csv')
player_data2 = pd.read_csv('/content/drive/MyDrive/CSV files/team_2_player_statistics (19).csv')
player_data3 = pd.read_csv('/content/drive/MyDrive/CSV files/team_3_player_statistics (16).csv')
player_data4 = pd.read_csv('/content/drive/MyDrive/CSV files/team_4_player_statistics (16).csv')

teams1 = pd.read_csv('/content/drive/MyDrive/CSV files/teams_final_statistics (16).csv')
teams2 = pd.read_csv('/content/drive/MyDrive/CSV files/teams2_final_statistics (16) .csv')
combined_teams   = pd.concat([teams1, teams2], ignore_index=True)

player_data_dict = {
    'player_data1': player_data1,
    'player_data2': player_data2,
    'player_data3': player_data3,
    'player_data4': player_data4,
}


def euclidean_distance(color1, color2):
    return np.sqrt(np.sum((color1 - color2) ** 2))

# Extract the first team's color from mobile_data1
first_team_color = np.array(mobile_data1.iloc[0]['Team_Color'].strip("[]").split(), dtype=float)

# Extract the opponent's team color from mobile_data2
opponent_team_color = np.array(mobile_data2.iloc[0]['Team_Color '].strip("[]").split(), dtype=float)

# Function to clean and convert the 'team_color' string to a NumPy array
def clean_color_string(color_string):
    color_list = color_string.strip("[]").split()  # Split on spaces
    return np.array(color_list, dtype=float)

# Function to find the closest dataset in a player dataset
def find_closest_player_dataset(player_data, target_color):
    non_goalkeeper_data = player_data[player_data['position'] != 'goalkeeper']  # Exclude goalkeepers
    player_colors = non_goalkeeper_data['team_color'].apply(clean_color_string)
    distances = np.array([euclidean_distance(target_color, player_color) for player_color in player_colors])

    if len(distances) > 0:
        closest_distance = np.min(distances)
    else:
        closest_distance = np.inf  # Handle case when no players are available

    return closest_distance

# Get the closest player dataset based on color
def get_closest_player_data(mobile_color, threshold=10):
    distances = {}

    distances['player_data1'] = find_closest_player_dataset(player_data1, mobile_color)
    distances['player_data2'] = find_closest_player_dataset(player_data2, mobile_color)
    distances['player_data3'] = find_closest_player_dataset(player_data3, mobile_color)
    distances['player_data4'] = find_closest_player_dataset(player_data4, mobile_color)

    # Get the closest player data with minimum distance
    closest_player_dataset = min(distances, key=distances.get)

    return closest_player_dataset

# Find closest player datasets for mobile_data1 and mobile_data2
closest_player_data_first_team = get_closest_player_data(first_team_color)
closest_player_data_first_team = player_data_dict[closest_player_data_first_team]

# Output the final closest player datasets


position_mapping = dict(zip(mobile_data1['Shirt_Number'], mobile_data1['Position']))

# Update positions in closest_player_data_first_team based on the mapping
for index, player in closest_player_data_first_team.iterrows():
    shirt_number = player['shirtNumber']

    # Check if the shirt number exists in the mapping
    if shirt_number in position_mapping:
        closest_player_data_first_team.at[index, 'position'] = position_mapping[shirt_number]


player_data = closest_player_data_first_team

def color_distance(row_color, target_color):
    return np.linalg.norm(row_color - target_color)

# Function to find the closest match in a DataFrame
def find_closest_match(df, target_color):
    # Convert the team_color string to an array of floats
    df['team_color_array'] = df['team_color'].apply(lambda x: np.fromstring(x.strip("[]"), sep=','))

    # Calculate distances and find the closest row
    df['distance'] = df['team_color_array'].apply(lambda x: color_distance(x, target_color))

    # Return the row with the minimum distance
    closest_row = df.loc[df['distance'].idxmin()]

    # Drop the temporary columns
    df.drop(columns=['team_color_array', 'distance'], inplace=True)

    return closest_row

# Find the closest matching row in teams1 for first_team_color
closest_row_team1 = find_closest_match(combined_teams, first_team_color)

# Find the closest matching row in teams2 for opponent_team_color
closest_row_team2 = find_closest_match(combined_teams, opponent_team_color)

# Output the closest matching rows


# Optionally, you can combine the closest rows into a DataFrame
combined_closest_rows = pd.DataFrame([closest_row_team1, closest_row_team2])

teams = combined_closest_rows
data_cleaned = pd.read_csv('/content/drive/MyDrive/CSV files/data_cleaned.csv')

player_data['shirt_number'] = player_data.pop('shirtNumber')
player_data['pass_success'] = player_data.pop('%_pass_success')
player_data['dribbles_success'] = player_data.pop('%_dribbles_success')
player_data['aerial_success'] = player_data.pop('%_aerial_success')
player_data['tackles_success'] = player_data.pop('%_tackles_success')

model1 = FirstModel(teams, data_cleaned)
similar_rows = model1.find_similar_rows()
recommended_formations = model1.find_winning_rows(similar_rows)
player_stats = MyPlayerStats(player_data, correct_shirt_numbers)
processed_data = player_stats.process_data()
processed_data['shirt_number'] = processed_data['corrected_shirt_number']
processed_data.drop(columns=['corrected_shirt_number'], inplace=True)

match_data = recommended_formations
input_row = match_data.iloc[0].to_dict()
input_row['tackles_success'] = input_row.pop('tackle_success')

row = pd.DataFrame([input_row])

team_recommender = SecondModel(input_row, processed_data)
selected_players, team_stats = team_recommender.recommend_team()

if selected_players is not None:
    print("\nRecommended Team:")
    display(selected_players)
    selected_players.to_csv('/content/selected_players.csv', index=False)
    print("\nTeam Stats:")
    display(team_stats)
    player_shirt_number_to_remove = selected_players['shirt_number'].tolist()

    player_data_updated = processed_data[~processed_data['shirt_number'].isin(player_shirt_number_to_remove)]
    substitute_recommender = SecondModel(input_row, player_data_updated)
    selected_substitutes, team_stats = substitute_recommender.recommend_team()
    if selected_substitutes is not None:
        print("\nRecommended Substitutes:")
        display(selected_substitutes)
        selected_substitutes.to_csv('/content/selected_substitutes.csv', index=False)
        print("\nSubstitute Team Stats:")
        display(team_stats)


  df['team_color_array'] = df['team_color'].apply(lambda x: np.fromstring(x.strip("[]"), sep=','))
  df['team_color_array'] = df['team_color'].apply(lambda x: np.fromstring(x.strip("[]"), sep=','))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['defenders'] = formation_split.str[0].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['midfielders'] = formation_split.str[1].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: ht

Unnamed: 0,id_match,formations,score,goals,goals_past,total_shots,shots_on_target,shots_off_target,blocked_shots,total_possession,total_passes,accurate_passes,pass_success,key_passes,dribbles_attempted,dribbles_past,dribbles_success,aerials_won,aerial_success,offensive_aerials,defensive_aerials,tackles,tackle_success,tackles_attempted,clearances,interceptions,corners,win,draw,lose,dribbles,defenders,midfielders,attackers,advanced_midfielders
3444,1723,4-4-2,1 - 0,1,0,10,3,5,2,55.1,350,261,75,7,7,0,57,21,55,20,18,9,100,9,17,9,7,1,0,0,4,4,4,2,0
1171,586,3-4-3,3 - 1,3,1,22,7,6,9,55.3,360,255,71,18,18,5,61,23,40,15,42,18,78,23,21,10,9,1,0,0,11,3,4,3,0
3383,1692,4-3-3,2 - 1,2,1,12,6,3,3,58.8,367,265,72,9,17,4,59,18,56,17,15,19,83,23,17,10,6,1,0,0,10,4,3,3,0
2701,1351,4-4-2,3 - 1,3,1,14,4,7,3,59.9,375,277,74,11,15,7,47,27,59,18,28,18,72,25,19,4,7,1,0,0,7,4,4,2,0
2420,1211,4-2-3-1,4 - 1,4,1,14,9,2,3,61.1,402,313,78,9,15,8,67,21,64,16,17,16,67,24,27,6,4,1,0,0,10,4,2,1,3
3302,1652,4-4-2,3 - 2,3,2,24,9,12,3,63.1,401,333,83,19,11,6,27,26,68,11,27,8,57,14,16,7,11,1,0,0,3,4,4,2,0
2768,1385,3-5-2,1 - 0,1,0,16,3,11,2,62.1,428,307,72,5,12,9,58,24,45,19,34,18,67,27,15,5,5,1,0,0,7,3,5,2,0
2441,1221,4-3-3,2 - 1,2,1,10,3,5,2,60.5,418,323,77,7,14,6,57,21,62,17,17,9,60,15,18,2,1,1,0,0,8,4,3,3,0
3034,1518,4-2-3-1,3 - 1,3,1,26,8,10,8,60.4,418,336,80,17,18,11,67,9,50,8,10,21,66,32,15,8,5,1,0,0,12,4,2,1,3
2897,1449,4-2-3-1,2 - 0,2,0,20,5,11,4,61.5,429,326,76,15,19,16,47,27,52,27,25,23,59,39,11,0,5,1,0,0,9,4,2,1,3


No optimal solution found.


