# Transfer Value Prediction

In [1]:
import numpy as np
import pandas as pd

In [2]:
player_stats=pd.read_csv('player_stats.csv')
player_wages=pd.read_csv('player_wages.csv')

In [3]:
player_stats.columns

Index(['Rank', 'Player', 'Nationality', 'Position', 'Club', 'Age', 'BirthYear',
       'MatchesPlayed', 'MatchesStarted', 'MinutesPlayed', 'MatchesPlayed_90s',
       'Goals_Total', 'Assists_Total', 'GoalsPlusAssists', 'NonPenaltyGoals',
       'PenaltyGoals', 'PenaltyAttempts', 'YellowCards', 'RedCards',
       'xG_Total', 'NonPenalty_xG', 'xAG_Total', 'NonPenalty_xG_plus_xAG',
       'ProgressiveCarries', 'ProgressivePasses', 'ProgressivePassesReceived',
       'Goals_Total_per90', 'Assists_Total_per90', 'GoalsPlusAssists_per90',
       'NonPenaltyGoals_per90', 'G+A_ExclPK', 'xG_Total_per90',
       'xAG_Total_per90', 'xG_plus_xAG', 'NonPenalty_xG_per90',
       'NonPenalty_xG_plus_xAG_per90'],
      dtype='object')

In [4]:
player_wages.columns

Index(['Rk', 'Player', 'Nation', 'Pos', 'Squad', 'Age',
       'Annual Wages in pounds', 'Weekly Wages in pounds'],
      dtype='object')

# Remove commas and convert to float
player_wages['Annual Wages in pounds'] = (
    player_wages['Annual Wages in pounds'].str.replace(',', '').astype(float)
)

player_wages['Weekly Wages in pounds'] = (
    player_wages['Weekly Wages in pounds'].str.replace(',', '').astype(float)

### Merge Datasets

In [13]:
# Ensure both have stripped player names (sometimes extra spaces or accents can interfere)
player_stats['Player'] = player_stats['Player'].str.strip()
player_wages['Player'] = player_wages['Player'].str.strip()

# Merge on Player name
merged_df = pd.merge(player_stats, player_wages, on='Player', how='inner')

In [14]:
merged_df.dtypes

Rank                              int64
Player                           object
Nationality                      object
Position                         object
Club                             object
Age_x                           float64
BirthYear                       float64
MatchesPlayed                     int64
MatchesStarted                    int64
MinutesPlayed                     int64
MatchesPlayed_90s               float64
Goals_Total                       int64
Assists_Total                     int64
GoalsPlusAssists                  int64
NonPenaltyGoals                   int64
PenaltyGoals                      int64
PenaltyAttempts                   int64
YellowCards                       int64
RedCards                          int64
xG_Total                        float64
NonPenalty_xG                   float64
xAG_Total                       float64
NonPenalty_xG_plus_xAG          float64
ProgressiveCarries                int64
ProgressivePasses                 int64


In [15]:
# Drop unnecessary columns if needed
merged_df.drop(['Rk', 'Nation', 'Squad'], axis=1, inplace=True, errors='ignore')

# Check for missing values
merged_df.isnull().sum()

# Handle missing values
merged_df = merged_df.dropna()  # Or use imputation if needed

In [16]:
# Convert wages to numeric (remove commas/£ signs if needed)
merged_df['Weekly Wages in pounds'] = (
    merged_df['Weekly Wages in pounds'].replace('[£,]', '', regex=True).astype(float)
)

# Encode categorical columns if needed
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
merged_df['Position'] = le.fit_transform(merged_df['Position'])  