In [None]:
import pandas as pd
import re

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 100)

df = pd.read_csv("files/fifa21.csv")
df.head()

In [None]:
def convert_financial(value):
    if isinstance(value, str):
        value = value.replace('€', '').replace('M', 'e6').replace('K', 'e3')
        try:
            return float(eval(value))
        except:
            return None
    return value

def convert_height(height):
    if isinstance(height, str):
        feet, inches = re.findall(r'(\d+)', height)
        return int(feet) * 30.48 + int(inches) * 2.54
    return height

def convert_weight(weight):
    if isinstance(weight, str):
        return int(re.findall(r'(\d+)', weight)[0]) * 0.453592
    return weight

def remove_extradata(value):
    if isinstance(value, str):
        return value.replace('★', '').replace('\n', ' ').replace('  ', ' ').strip()
    return value

df['Value'] = df['Value'].apply(convert_financial)
df['Wage'] = df['Wage'].apply(convert_financial)
df['Release Clause'] = df['Release Clause'].apply(convert_financial)
df['Height'] = df['Height'].apply(convert_height)
df['IR'] = df['IR'].apply(remove_extradata)
df['A/W'] = df['A/W'].apply(remove_extradata)
df['W/F'] = df['W/F'].apply(remove_extradata)
df['SM'] = df['SM'].apply(remove_extradata)
df['Hits'] = df['Hits'].apply(remove_extradata)
df['Team & Contract'] = df['Team & Contract'].apply(remove_extradata)
df

In [None]:
def convert_date(date_str):
    try:
        return pd.to_datetime(date_str, format='%b %d, %Y')
    except ValueError:
        return None

df.drop_duplicates()
df['Joined'] = df['Joined'].apply(convert_date)
df['Joined Year'] = df['Joined'].dt.year
df[['Team', 'Contract']] = df['Team & Contract'].str.extract(r'^(.*\D)(\d{4} ~ \d{4})$')
df[['Start Year', 'End Year']] = df['Contract'].str.split(' ~ ', expand=True)
df.drop(columns=['Contract', 'Team & Contract', 'Unnamed: 0', 'photoUrl', 'playerUrl', 'Start Year'], inplace=True)
df['Loan Date End'].fillna('UnKnown', inplace=True)
df['Team'].fillna('UnKnown', inplace=True)
df['End Year'].fillna(str('0000').zfill(4), inplace=True)
df.isnull().sum()