In [41]:
# =======================
# DEFENDER RECOMMENDATION MODEL
# =======================

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Import libraries
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# =========================
# 1. Load dataset
# =========================
file_path = "/content/drive/MyDrive/defenders_only_fixed_2023.csv"  # update if needed
df = pd.read_csv(file_path)

print("Dataset loaded successfully ✅")
print(df.head())

# =========================
# 2. Feature Engineering
# =========================
df['features'] = (
    df['position'].fillna('') + " " +
    df['foot'].fillna('') + " " +
    df['country_of_citizenship'].fillna('') + " " +
    df['current_club_domestic_competition_id'].fillna('')
)

# =========================
# 3. Vectorization
# =========================
vectorizer = TfidfVectorizer()
feature_matrix = vectorizer.fit_transform(df['features'])
cosine_sim = cosine_similarity(feature_matrix, feature_matrix)

# =========================
# 4. Recommendation Function
# =========================
def recommend_defenders(player_name, max_value, n=5):
    if player_name not in df['name'].values:
        print(f"❌ Player '{player_name}' not found in database.")
        return None

    # Get seed player info
    idx = df[df['name'] == player_name].index[0]
    seed_foot = df.loc[idx, 'foot']  # ✅ use the footedness of the seed player

    # Compute similarity
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:]  # exclude itself

    # Candidate pool
    player_indices = [i[0] for i in sim_scores]
    recommendations = df.loc[player_indices,
                             ['name','current_club_name','country_of_citizenship',
                              'position','foot','market_value_in_eur']]

    # ✅ Filter by same foot as seed player
    recommendations = recommendations[recommendations['foot'] == seed_foot]

    # ✅ Filter by budget
    recommendations = recommendations[recommendations['market_value_in_eur'] <= max_value]

    # Sort by similarity and transfer value
    recommendations = recommendations.sort_values(by='market_value_in_eur', ascending=False).head(n)
    recommendations = recommendations.rename(columns={'market_value_in_eur': 'transfer_value'})

    return recommendations

# =========================
# 5. Example Usage
# =========================
seed_player = input("⚽ Enter the name of the defender you want recommendations for: ")
budget = int(input("💰 Enter your maximum transfer value (in EUR): "))

print(f"\n🔎 Recommendations for: {seed_player} (same foot, Budget ≤ €{budget:,})\n")
recs = recommend_defenders(seed_player, max_value=budget, n=5)

if recs is not None and not recs.empty:
    print(recs.to_string(index=False))
else:
    print("⚠️ No recommendations found within your budget and criteria.")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Dataset loaded successfully ✅
                     name country_of_citizenship  position   foot  \
0          William Saliba                 France  Defender  right   
1              Rúben Dias               Portugal  Defender  right   
2          Josko Gvardiol                Croatia  Defender   left   
3  Trent Alexander-Arnold                England  Defender  right   
4      Alessandro Bastoni                  Italy  Defender   left   

   height_in_cm current_club_domestic_competition_id current_club_name  \
0         192.0                                  GB1        Arsenal FC   
1         187.0                                  GB1   Manchester City   
2         185.0                                  GB1   Manchester City   
3         180.0                                  GB1      Liverpool FC   
4         190.0                                  IT1    