## Setting up the data

### Importing the Libraries

In [1]:
import os
import pandas as pd

### Importing the Dataset

In [2]:
csv_filepath = os.path.join(os.path.dirname(os.path.abspath('')), "data", "football_players_wage_data.csv")
football_wage_data = pd.read_csv(csv_filepath)
football_wage_data = football_wage_data.iloc[:, ::-1]
football_wage_data.head()

Unnamed: 0,Caps,Apps,Position,Nation,League,Club,Age,Wage
0,57,190,Forward,FRA,Ligue 1 Uber Eats,PSG,23,46427000
1,119,324,Midfilder,BRA,Ligue 1 Uber Eats,PSG,30,42125000
2,162,585,Forward,ARG,Ligue 1 Uber Eats,PSG,35,34821000
3,120,443,Forward,BEL,La Liga,R. Madrid,31,19959000
4,45,480,Goalkeeper,ESP,Premier League,Man UFC,31,19500000


## Exploring the dataset properties

### Checking Missing Values

In [3]:
football_wage_data.isna().sum()

Caps        0
Apps        0
Position    0
Nation      0
League      0
Club        0
Age         0
Wage        0
dtype: int64

### Checking and removing the duplicates

In [4]:
football_wage_data.duplicated().sum()

65

In [5]:
football_wage_data[football_wage_data.duplicated()]

Unnamed: 0,Caps,Apps,Position,Nation,League,Club,Age,Wage
1488,0,0,Midfilder,ENG,Premier League,Chelsea,18,780000
2372,0,0,Defender,ENG,Premier League,Chelsea,18,182000
2433,0,2,Midfilder,CMR,Ligue 1 Uber Eats,LOSC,18,163000
2476,0,0,Midfilder,ENG,Premier League,Man City,18,156000
2584,0,0,Forward,ENG,Premier League,Liverpool,18,130000
...,...,...,...,...,...,...,...,...
3868,0,0,Midfilder,ESP,La Liga,A. Madrid,18,13000
3869,0,0,Defender,ESP,La Liga,A. Madrid,18,13000
3875,0,0,Defender,ESP,La Liga,Sevilla,18,13000
3878,0,0,Defender,ESP,La Liga,Valencia,18,13000


In [6]:
football_wage_data = football_wage_data.drop_duplicates()

In [7]:
football_wage_data[football_wage_data.duplicated()]

Unnamed: 0,Caps,Apps,Position,Nation,League,Club,Age,Wage


### Dataset descriptions

In [8]:
football_wage_data.head()

Unnamed: 0,Caps,Apps,Position,Nation,League,Club,Age,Wage
0,57,190,Forward,FRA,Ligue 1 Uber Eats,PSG,23,46427000
1,119,324,Midfilder,BRA,Ligue 1 Uber Eats,PSG,30,42125000
2,162,585,Forward,ARG,Ligue 1 Uber Eats,PSG,35,34821000
3,120,443,Forward,BEL,La Liga,R. Madrid,31,19959000
4,45,480,Goalkeeper,ESP,Premier League,Man UFC,31,19500000


In [9]:
football_wage_data.shape

(3842, 8)

In [10]:
football_wage_data.columns

Index(['Caps', 'Apps', 'Position', 'Nation', 'League', 'Club', 'Age', 'Wage'], dtype='object')

In [11]:
football_wage_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3842 entries, 0 to 3906
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Caps      3842 non-null   int64 
 1   Apps      3842 non-null   int64 
 2   Position  3842 non-null   object
 3   Nation    3842 non-null   object
 4   League    3842 non-null   object
 5   Club      3842 non-null   object
 6   Age       3842 non-null   int64 
 7   Wage      3842 non-null   object
dtypes: int64(3), object(5)
memory usage: 270.1+ KB


In [12]:
football_wage_data.describe()

Unnamed: 0,Caps,Apps,Age
count,3842.0,3842.0,3842.0
mean,9.077564,142.42608,24.221499
std,20.657954,131.527544,4.914963
min,0.0,0.0,18.0
25%,0.0,19.0,20.0
50%,0.0,119.0,24.0
75%,7.0,226.0,28.0
max,180.0,715.0,41.0


### Unique values of Categorical columns

In [13]:
football_wage_data["Position"].unique()

array(['Forward', 'Midfilder', 'Goalkeeper', 'Defender'], dtype=object)

In [14]:
football_wage_data["Nation"].unique()

array(['FRA', 'BRA', 'ARG', 'BEL', 'ESP', 'AUT', 'EGY', 'ENG', 'NOR',
       'GER', 'SVN', 'SEN', 'POL', 'CRO', 'NED', 'POR', 'ITA', 'SRB',
       'ALG', 'CIV', 'DEN', 'CRC', 'KOR', 'GHA', 'URU', 'MAR', 'GAB',
       'SWE', 'SCO', 'COL', 'TUR', 'BIH', 'UKR', 'USA', 'CMR', 'CAN',
       'GUI', 'WAL', 'MLI', 'SUI', 'CTA', 'DOM', 'NIR', 'MEX', 'NGA',
       'VEN', 'CZE', 'SVK', 'JAM', 'CHI', 'ARM', 'NZL', 'ZAM', 'JPN',
       'RUS', 'PAR', 'ALB', 'IRL', 'MNE', 'KOS', 'FIN', 'GRE', 'AUS',
       'BFA', 'ECU', 'ISR', 'HUN', 'IRN', 'COD', 'TOG', 'ROU', 'UZB',
       'MKD', 'GNB', 'MOZ', 'GRN', 'GEO', 'ZIM', 'TUN', 'SUR', 'LUX',
       'GAM', 'EQG', 'HON', 'MSR', 'BUL', 'ANG', 'RSA', 'BDI', 'CPV',
       'CYP', 'PER', 'SLE', 'GLP', 'BEN', 'PHI', 'GUF', 'LBY', 'COM',
       'CGO', 'ISL', 'MAD', 'LTU', 'PAN', 'LVA', 'CUW', 'EST', 'IRQ',
       'GUY', 'PLE', 'THA', 'CHN', 'BER', 'SIN'], dtype=object)

In [15]:
football_wage_data["League"].unique()

array(['Ligue 1 Uber Eats', 'La Liga', 'Premier League', 'Serie A',
       'Bundesliga', 'Primiera Liga'], dtype=object)