In [50]:
import sqlite3
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.impute import SimpleImputer

import sqlite3

conn = sqlite3.connect("database.sqlite")

tables = ['Country', 'League', 'Match', 'Player', 'Player_Attributes', 'Team', 'Team_Attributes']

df_country = pd.read_sql_query("SELECT * FROM Country", conn)
df_league = pd.read_sql_query("SELECT * FROM League", conn)
df_match = pd.read_sql_query("SELECT * FROM Match", conn)
df_player = pd.read_sql_query("SELECT * FROM Player", conn)
df_player_attributes = pd.read_sql_query("SELECT * FROM Player_Attributes", conn)
df_team = pd.read_sql_query("SELECT * FROM Team", conn)
df_team_attributes = pd.read_sql_query("SELECT * FROM Team_Attributes", conn)

conn.close()

In [74]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
import warnings
from sklearn.exceptions import DataConversionWarning

# Suppress all specific warnings related to DataConversionWarning
warnings.filterwarnings(action='ignore', category=UserWarning, module='sklearn')
warnings.filterwarnings(action='ignore', category=DataConversionWarning)

mean_values = df_player_attributes.select_dtypes(include=[np.number]).mean()
# Step 1: Analyze Team Weakness
def analyze_team_weakness(team_id, df_match, df_team_attributes):
    df_match['date'] = pd.to_datetime(df_match['date'])
    team_matches_2016 = df_match[(df_match['date'].dt.year == 2016) & 
                                 ((df_match['home_team_api_id'] == team_id) | (df_match['away_team_api_id'] == team_id))]
    
    # Calculate goals scored and conceded in 2016
    goals_scored = team_matches_2016['home_team_goal'].where(team_matches_2016['home_team_api_id'] == team_id, 
                                                             team_matches_2016['away_team_goal']).sum()
    goals_conceded = team_matches_2016['away_team_goal'].where(team_matches_2016['home_team_api_id'] == team_id, 
                                                               team_matches_2016['home_team_goal']).sum()
    
    # Get the latest team attributes before 2016
    team_attributes = df_team_attributes[(df_team_attributes['team_api_id'] == team_id) & 
                                         (pd.to_datetime(df_team_attributes['date']).dt.year < 2016)]
    
    if not team_attributes.empty:
        latest_team_attributes = team_attributes.sort_values(by='date', ascending=False).iloc[0]
        weaknesses = {
            'goals_scored': goals_scored,
            'goals_conceded': goals_conceded,
            'buildUpPlayPassing': latest_team_attributes['buildUpPlayPassing'],
            'chanceCreationPassing': latest_team_attributes['chanceCreationPassing'],
            'defenceAggression': latest_team_attributes['defenceAggression']
        }
    else:
        weaknesses = {'goals_scored': goals_scored, 'goals_conceded': goals_conceded, 'buildUpPlayPassing': 50, 
                      'chanceCreationPassing': 50, 'defenceAggression': 50}

    # Determine required role based on weaknesses
    if goals_scored < goals_conceded:
        print("Weakness identified: Defense")
        required_role = 'defender'
    elif goals_scored < 50:  
        print("Weakness identified: Attack")
        required_role = 'attacker'
    else:
        print("Weakness identified: Midfield")
        required_role = 'midfielder'
    
    return weaknesses, required_role

merged_df = pd.merge(df_team, df_match, left_on='team_api_id', right_on='home_team_api_id', how='inner')

final_df = pd.merge(merged_df, df_league, left_on='league_id', right_on='id', how='inner')

df_team_league_country = final_df[['team_long_name', 'team_api_id', 'name', 'id_x', 'country_id_y']].drop_duplicates()

df_team_league_country.columns = ['team_name', 'team_api_id', 'league_name', 'league_id', 'country_id']

df_team_league_country = pd.merge(df_team_league_country, df_country, left_on='country_id', right_on='id', how='left')

df_team_league_country = df_team_league_country[['team_name', 'team_api_id', 'league_name', 'league_id', 'name', 'country_id']]
df_team_league_country.columns = ['team_name', 'team_api_id', 'league_name', 'league_id', 'country_name', 'country_id']

df_team_league_country = df_team_league_country.sort_values(['country_name', 'league_name', 'team_name'])

df_team_league_country.to_csv('team_league_country_with_ids.csv', index=False)
teams = pd.merge(df_team_attributes,df_team, on='team_fifa_api_id', how='inner')  # Inner join

def identify_transfer_candidates(df_player_attributes, df_player, teams, required_role):
    # Define allowed country IDs
    allowed_country_ids = [1729, 4769, 7809, 10257, 21518]
    
    # Select only numeric columns for aggregation
    df_player_attributes_numeric = df_player_attributes.select_dtypes(include=[np.number])
    df_player_attributes_numeric['year'] = pd.to_datetime(df_player_attributes['date']).dt.year
    player_yearly_data = df_player_attributes_numeric.groupby(['player_api_id', 'year']).mean().reset_index()
    
    # Filter players based on allowed country IDs by merging with the teams DataFrame
    player_yearly_data = player_yearly_data.merge(
        teams[['team_api_id', 'country_id']], on='team_api_id', how='left'
    )
    player_yearly_data = player_yearly_data[player_yearly_data['country_id'].isin(allowed_country_ids)]
    
    if required_role == 'defender':
        top_performers = player_yearly_data[
            (player_yearly_data['interceptions'] > mean_values['interceptions']) &
            (player_yearly_data['standing_tackle'] > mean_values['standing_tackle']) &
            (player_yearly_data['sliding_tackle'] > mean_values['sliding_tackle']) &
            (player_yearly_data['strength'] > mean_values['strength']) &
            (player_yearly_data['marking'] > mean_values['marking']) &
            (player_yearly_data['aggression'] > mean_values['aggression'])
        ]
        high_potential = player_yearly_data[
            (player_yearly_data['overall_rating'] < 80) &
            (player_yearly_data['interceptions'] > mean_values['interceptions'] * 0.8) &
            (player_yearly_data['standing_tackle'] > mean_values['standing_tackle'] * 0.8) &
            (player_yearly_data['sliding_tackle'] > mean_values['sliding_tackle'] * 0.8) &
            (player_yearly_data['strength'] > mean_values['strength'] * 0.8) &
            (player_yearly_data['marking'] > mean_values['marking'] * 0.8) &
            (player_yearly_data['aggression'] > mean_values['aggression'] * 0.8)
        ]

    elif required_role == 'attacker':
        top_performers = player_yearly_data[
            (player_yearly_data['finishing'] > mean_values['finishing']) &
            (player_yearly_data['dribbling'] > mean_values['dribbling']) &
            (player_yearly_data['shot_power'] > mean_values['shot_power']) &
            (player_yearly_data['acceleration'] > mean_values['acceleration']) &
            (player_yearly_data['positioning'] > mean_values['positioning']) &
            (player_yearly_data['sprint_speed'] > mean_values['sprint_speed'])
        ]
        high_potential = player_yearly_data[
            (player_yearly_data['overall_rating'] < 80) &
            (player_yearly_data['finishing'] > mean_values['finishing'] * 0.8) &
            (player_yearly_data['dribbling'] > mean_values['dribbling'] * 0.8) &
            (player_yearly_data['shot_power'] > mean_values['shot_power'] * 0.8) &
            (player_yearly_data['acceleration'] > mean_values['acceleration'] * 0.8) &
            (player_yearly_data['positioning'] > mean_values['positioning'] * 0.8) &
            (player_yearly_data['sprint_speed'] > mean_values['sprint_speed'] * 0.8)
        ]

    elif required_role == 'midfielder':
        top_performers = player_yearly_data[
            (player_yearly_data['short_passing'] > mean_values['short_passing']) &
            (player_yearly_data['vision'] > mean_values['vision']) &
            (player_yearly_data['ball_control'] > mean_values['ball_control']) &
            (player_yearly_data['stamina'] > mean_values['stamina']) &
            (player_yearly_data['agility'] > mean_values['agility']) &
            (player_yearly_data['long_passing'] > mean_values['long_passing']) &
            (player_yearly_data['reactions'] > mean_values['reactions'])
        ]
        high_potential = player_yearly_data[
            (player_yearly_data['overall_rating'] < 80) &
            (player_yearly_data['short_passing'] > mean_values['short_passing'] * 0.8) &
            (player_yearly_data['vision'] > mean_values['vision'] * 0.8) &
            (player_yearly_data['ball_control'] > mean_values['ball_control'] * 0.8) &
            (player_yearly_data['stamina'] > mean_values['stamina'] * 0.8) &
            (player_yearly_data['agility'] > mean_values['agility'] * 0.8) &
            (player_yearly_data['long_passing'] > mean_values['long_passing'] * 0.8) &
            (player_yearly_data['reactions'] > mean_values['reactions'] * 0.8)
        ]

    else:  # Goalkeeper role
        top_performers = player_yearly_data[
            (player_yearly_data['gk_diving'] > 60) &
            (player_yearly_data['gk_handling'] > 60) &
            (player_yearly_data['gk_positioning'] > 60) &
            (player_yearly_data['gk_reflexes'] > 60)
        ]
        high_potential = player_yearly_data[
            (player_yearly_data['overall_rating'] < 80) &
            (player_yearly_data['gk_diving'] > 50) &
            (player_yearly_data['gk_handling'] > 50) &
            (player_yearly_data['gk_positioning'] > 50) &
            (player_yearly_data['gk_reflexes'] > 50)
        ]
        
    # Filter for players available before 2016
    top_performers = top_performers[top_performers['year'] < 2016]
    high_potential = high_potential[high_potential['year'] < 2016]

    top_performers = top_performers.sort_values(by='overall_rating', ascending=False).drop_duplicates('player_api_id')
    top_performers = top_performers.merge(df_player[['player_api_id', 'player_name']], on='player_api_id', how='left')

    # Identify high potential players and exclude those already in top performers
    high_potential = high_potential[~high_potential['player_api_id'].isin(top_performers['player_api_id'])]

    # Sort high potential players based on their potential and remove duplicates
    high_potential = high_potential.sort_values(by='potential', ascending=False).drop_duplicates('player_api_id')
    high_potential = high_potential.merge(df_player[['player_api_id', 'player_name']], on='player_api_id', how='left')

    return top_performers, high_potential

# Step 3: Predict Future Potential for High Potential Players
def predict_future_potential(df_player_attributes, high_potential):
    # Prepare for pre-2016 data filtering and feature selection
    df_player_attributes['year'] = pd.to_datetime(df_player_attributes['date']).dt.year
    pre_2016_data = df_player_attributes[df_player_attributes['year'] < 2016]
    features = ['overall_rating', 'finishing', 'interceptions', 'short_passing']

    results = []
    for player_id in high_potential['player_api_id'].unique():
        player_data = pre_2016_data[pre_2016_data['player_api_id'] == player_id]

        # Drop NaNs and aggregate across all years
        player_data = player_data[features + ['potential']].dropna()
        if not player_data.empty and len(player_data) > 1:
            X = player_data[features]
            y = player_data['potential']
            
            # Fit a linear model to predict potential
            model = LinearRegression()
            model.fit(X, y)
            # Use the most recent stats to predict future potential
            most_recent_stats = high_potential[high_potential['player_api_id'] == player_id][features].iloc[0].values.reshape(1, -1)
            predicted_potential = model.predict(most_recent_stats)[0]
        else:
            predicted_potential = player_data['potential'].mean()  # Fallback if data is insufficient

        results.append((player_id, predicted_potential))
        
    # Return predictions with player names
    predicted_df = pd.DataFrame(results, columns=['player_api_id', 'predicted_potential'])
    predicted_df = predicted_df.merge(df_player[['player_api_id', 'player_name']], on='player_api_id', how='left')
    
    return predicted_df

# Run analysis
team_id = 8650  # Example team ID, replace with the team you're analyzing
weaknesses, required_role = analyze_team_weakness(team_id, df_match, df_team_attributes)
print(f"Required role due to team weakness: {required_role}")

# Get transfer candidates
for role in ["attacker","defender","midfielder","goalies"]:
    print("@#$!@#FWEQF@$#RWEQRWEFR", role, "rfgsdfdfgdfghdfhd")
    top_performers, high_potential = identify_transfer_candidates(df_player_attributes, df_player, teams, role)

    print("\nTop Performer Transfer Candidates:")
    print(top_performers[['player_name', 'overall_rating', 'potential', 'finishing', 'interceptions', 'short_passing']].head())

    print("\nHigh Potential Transfer Candidates Before Prediction Adjustment:")
    print(high_potential[['player_name', 'overall_rating', 'potential', 'finishing', 'interceptions', 'short_passing']].head())

    # Predict future potential for high potential candidates
    high_potential_with_prediction = predict_future_potential(df_player_attributes, high_potential)
    print("\nPredicted Potentials for High Potential Transfer Candidates:")
    print(high_potential_with_prediction[['player_name', 'predicted_potential']].head())

Weakness identified: Attack
Required role due to team weakness: attacker
@#$!@#FWEQF@$#RWEQRWEFR attacker rfgsdfdfgdfghdfhd


KeyError: "None of [Index(['team_api_id', 'country_id'], dtype='object')] are in the [columns]"