In [16]:
import sqlite3
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import sqlite3

conn = sqlite3.connect("database.sqlite")

tables = ['Country', 'League', 'Match', 'Player', 'Player_Attributes', 'Team', 'Team_Attributes']

df_country = pd.read_sql_query("SELECT * FROM Country", conn)
df_league = pd.read_sql_query("SELECT * FROM League", conn)
df_match = pd.read_sql_query("SELECT * FROM Match", conn)
df_player = pd.read_sql_query("SELECT * FROM Player", conn)
df_player_attributes = pd.read_sql_query("SELECT * FROM Player_Attributes", conn)
df_team = pd.read_sql_query("SELECT * FROM Team", conn)
df_team_attributes = pd.read_sql_query("SELECT * FROM Team_Attributes", conn)

conn.close()

In [17]:
# Step 1: Analyze Team Performance in 2016
def analyze_team_weakness(team_id, df_match, df_team_attributes):
    # Filter matches from 2016
    df_match['date'] = pd.to_datetime(df_match['date'])
    team_matches_2016 = df_match[(df_match['date'].dt.year == 2016) & 
                                 ((df_match['home_team_api_id'] == team_id) | (df_match['away_team_api_id'] == team_id))]
    
    # Calculate goals scored and conceded in 2016
    goals_scored = team_matches_2016['home_team_goal'].where(team_matches_2016['home_team_api_id'] == team_id, 
                                                             team_matches_2016['away_team_goal']).sum()
    goals_conceded = team_matches_2016['away_team_goal'].where(team_matches_2016['home_team_api_id'] == team_id, 
                                                               team_matches_2016['home_team_goal']).sum()
    
    # Analyze latest team attributes in 2016
    latest_team_attributes = df_team_attributes[(df_team_attributes['team_api_id'] == team_id) & 
                                                (pd.to_datetime(df_team_attributes['date']).dt.year < 2016)].sort_values(by='date', ascending=False).iloc[0]
    
    weaknesses = {
        'goals_scored': goals_scored,
        'goals_conceded': goals_conceded,
        'buildUpPlayPassing': latest_team_attributes['buildUpPlayPassing'],
        'chanceCreationPassing': latest_team_attributes['chanceCreationPassing'],
        'defenceAggression': latest_team_attributes['defenceAggression']
    }
    
    if goals_scored < goals_conceded:
        print("Weakness identified: Defense")
        required_role = 'defender'
    elif goals_scored < 50:  # Example threshold for attack weakness
        print("Weakness identified: Attack")
        required_role = 'attacker'
    else:
        print("Weakness identified: Midfield")
        required_role = 'midfielder'
    
    return weaknesses, required_role

# Step 2: Find Transfer Candidates Based on Weakness
def identify_transfer_candidates(df_player_attributes, required_role, weaknesses):
    # Filter players based on required role and attributes
    if required_role == 'defender':
        transfer_candidates = df_player_attributes[(df_player_attributes['interceptions'] > 60) & 
                                                   (df_player_attributes['strength'] > 60)]
    elif required_role == 'attacker':
        transfer_candidates = df_player_attributes[(df_player_attributes['finishing'] > 60) & 
                                                   (df_player_attributes['dribbling'] > 60)]
    else:  # Midfielder
        transfer_candidates = df_player_attributes[(df_player_attributes['short_passing'] > 60) & 
                                                   (df_player_attributes['ball_control'] > 60)]
    
    # Filter for players under 22 and high potential
    transfer_candidates = transfer_candidates[(transfer_candidates['potential'] > transfer_candidates['overall_rating']) & 
                                              (pd.to_datetime(transfer_candidates['date']).dt.year < 2016)]
    
    return transfer_candidates[['player_api_id', 'overall_rating', 'potential', 'finishing', 'interceptions', 'short_passing']]

# Calculate age based on birthday in df_player
df_player['age'] = pd.to_datetime('2016-01-01').year - pd.to_datetime(df_player['birthday']).dt.year
print("32434234243",df_player.head())
# Merge age into df_player_attributes
df_player_attributes = pd.merge(df_player_attributes, df_player[['player_api_id', 'age']], on='player_api_id', how='left')
print("2342342342342",df_player_attributes.head())

# Step 3: Predict Future Potential for Transfer Candidates
def predict_future_potential(df_player_attributes, transfer_candidates):
    df_player_attributes['year'] = pd.to_datetime(df_player_attributes['date']).dt.year
    features = ['overall_rating', 'finishing', 'interceptions', 'short_passing', 'age']
    
    results = []
    for _, player in transfer_candidates.iterrows():
        player_data = df_player_attributes[df_player_attributes['player_api_id'] == player['player_api_id']]
        print("########",player_data)
        
        if len(player_data) >= 3:
            # Use recent data to make predictions
            X = player_data[player_data['year'] < 2016][features]
            y = player_data[player_data['year'] < 2016]['potential']
            
            if len(X) > 1:
                model = LinearRegression()
                model.fit(X, y)
                predicted_potential = model.predict([player[features]])[0]
            else:
                predicted_potential = player['potential']  # Not enough data, fallback
        else:
            predicted_potential = player['potential']  # Fallback for players with insufficient data

        results.append((player['player_api_id'], predicted_potential))
        
    return pd.DataFrame(results, columns=['player_api_id', 'predicted_potential'])

# Continue with applying Steps to a Specific Team in 2016
team_id = 8650  # Replace with the specific team ID
weaknesses, required_role = analyze_team_weakness(team_id, df_match, df_team_attributes)
transfer_candidates = identify_transfer_candidates(df_player_attributes, required_role, weaknesses)

print("Transfer Candidates Before Prediction Adjustment:")
print(transfer_candidates.head())

# Predict future potential for transfer candidates
transfer_candidates_with_prediction = predict_future_potential(df_player_attributes, transfer_candidates)
print("Predicted Potentials for Transfer Candidates:")
print(transfer_candidates_with_prediction.head())

32434234243    id  player_api_id         player_name  player_fifa_api_id  \
0   1         505942  Aaron Appindangoye              218353   
1   2         155782     Aaron Cresswell              189615   
2   3         162549         Aaron Doran              186170   
3   4          30572       Aaron Galindo              140161   
4   5          23780        Aaron Hughes               17725   

              birthday  height  weight  age  
0  1992-02-29 00:00:00  182.88     187   24  
1  1989-12-15 00:00:00  170.18     146   27  
2  1991-05-13 00:00:00  170.18     163   25  
3  1982-05-08 00:00:00  182.88     198   34  
4  1979-11-08 00:00:00  182.88     154   37  
2342342342342    id  player_fifa_api_id  player_api_id                 date  overall_rating  \
0   1              218353         505942  2016-02-18 00:00:00            67.0   
1   2              218353         505942  2015-11-19 00:00:00            67.0   
2   3              218353         505942  2015-09-21 00:00:00         

KeyError: "['age'] not in index"