In [None]:
# Import Libraries and Data:
import pandas as pd
import matplotlib.pyplot as plt

# Read FIFA 21 data as a pandas DataFrame:
fifa21messy = pd.read_csv(
    "./HEALTHIT-MMUST-DATA-SCIENCE/datasets/fifa21_raw_data.csv")

# Clean Data: Wage and Value
# - Remove currency symbols and convert to integers
# Changing 'Value" to int type
fifa21messy['Value'] = fifa21messy['Value'].str.strip('€')
d = {'M': '*1000000', 'K': '*1000'}
s_convert = fifa21messy['Value'].replace(d, regex=True).map(pd.eval)
fifa21messy['Value'] = s_convert.astype(int)

# Changing 'Wage' to int type
fifa21messy['Wage'] = fifa21messy['Wage'].str.strip('€')
d = {'M': '*1000000', 'K': '*1000'}
s_convert = fifa21messy['Wage'].replace(d, regex=True).map(pd.eval)
fifa21messy['Wage'] = s_convert.astype(int)

# **1. Highlight Potential Targets:**
# Calculate value-to-wage ratio
fifa21messy["value_to_wage_ratio"] = fifa21messy["Value"] / fifa21messy["Wage"]

# Define color scheme based on ratio
def color_by_ratio(row):
    if row["value_to_wage_ratio"] > 5:
        return "green"
    elif row["value_to_wage_ratio"] > 3:
        return "yellow"
    else:
        return "red"

fifa21messy["marker_color"] = fifa21messy.apply(color_by_ratio, axis=1)

# Plot scatter with color coding
fig, ax = plt.subplots(figsize=(10, 6))
ax.scatter(
    fifa21messy["Wage"],
    fifa21messy["Value"],
    c=fifa21messy["marker_color"],
    alpha=0.7,
)

# **2. Filter by Position:**
# Create filtered DataFrame for specific positions (e.g., midfielders)
midfielders = fifa21messy[fifa21messy["Positions"].str.contains("MF")]

# Plot filtered data on same axes
ax.scatter(
    midfielders["Wage"], midfielders["Value"], c="blue", alpha=0.7, label="Midfielders"
)

# Legend and labels
ax.legend()
ax.set_xlabel("Wage")
ax.set_ylabel("Value")
ax.set_title("Player Wage vs. Value")

# **3. Explore Correlations:**
# Calculate correlation coefficient
correlation = fifa21messy["Wage"].corr(fifa21messy["Value"])

# Print correlation coefficient
print(f"Correlation between Wage and Value: {correlation:.2f}")


# - Players with high value-to-wage ratios are highlighted in green, medium in yellow, and low in red.
# - Midfielders are plotted as a separate group for comparison.
# - The correlation coefficient between Wage and Value is calculated and printed.