In [1]:
!pip install lightfm


Collecting lightfm
  Downloading lightfm-1.17.tar.gz (316 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/316.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m316.4/316.4 kB[0m [31m18.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: lightfm
  Building wheel for lightfm (setup.py) ... [?25l[?25hdone
  Created wheel for lightfm: filename=lightfm-1.17-cp311-cp311-linux_x86_64.whl size=831159 sha256=88001f1063e34d5dfa5388353e7461e276452f0d0762c930e2d403bba7544252
  Stored in directory: /root/.cache/pip/wheels/b9/0d/8a/0729d2e6e3ca2a898ba55201f905da7db3f838a33df5b3fcdd
Successfully built lightfm
Installing collected packages: lightfm
Successfully installed lightfm-1.17


In [2]:
pip install fuzzywuzzy


Collecting fuzzywuzzy
  Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl.metadata (4.9 kB)
Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl (18 kB)
Installing collected packages: fuzzywuzzy
Successfully installed fuzzywuzzy-0.18.0


In [12]:
#test
import numpy as np
import pandas as pd
from lightfm import LightFM
from lightfm.data import Dataset
import unicodedata
import random
from fuzzywuzzy import process

from lightfm.evaluation import precision_at_k, auc_score, recall_at_k

# Enhanced normalization functions
def normalize_text(text):
    text = str(text).strip()
    return unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('utf-8').lower()

# Data loading
transfer_df = pd.read_csv("Player_Contracts_Cleaned.csv")
staff_club = pd.read_csv("cleaned_staff.csv", encoding="cp1252")
agents = pd.read_csv('player_agents.csv')

# Create normalized versions
transfer_df['Agency_norm'] = transfer_df['Agency Id'].apply(normalize_text)
transfer_df['Player_norm'] = transfer_df['Player Id'].apply(normalize_text)
transfer_df['Club_norm'] = transfer_df['Club Id'].apply(normalize_text)

staff_club['Staff_norm'] = staff_club['Name'].apply(normalize_text)
staff_club['Club_norm'] = staff_club['Club'].apply(normalize_text)

agents['Agents_norm'] = agents['Agent Name'].apply(normalize_text)
agents['Agency_norm'] = agents['Agency'].apply(normalize_text)

# Create name mappings for display
player_id_to_name = dict(zip(transfer_df['Player_norm'], transfer_df['Player']))
club_id_to_name = dict(zip(transfer_df['Club_norm'], transfer_df['Club Name']))
agency_id_to_name = dict(zip(transfer_df['Agency_norm'], transfer_df['Agency Name']))


staff_name_map = dict(zip(staff_club['Staff_norm'], staff_club['Name']))
club_name_map_staff = dict(zip(staff_club['Club_norm'], staff_club['Club']))

agents_to_name = dict(zip(agents['Agents_norm'], agents['Agent Name']))
agency_to_name = dict(zip(agents['Agency_norm'], agents['Agency']))


# Enhanced model building
def build_model(interactions):
    """Configure and train LightFM model with improved parameters"""
    return LightFM(
        loss='warp',
        no_components=100,
        learning_rate=0.02,
        max_sampled=100,
        user_alpha=1e-6,
        item_alpha=1e-6
    ).fit(interactions, epochs=200, verbose=True)

# Agency-Club Model
club_dataset = Dataset()
club_dataset.fit(
    users=transfer_df['Agency_norm'].unique(),
    items=transfer_df['Club_norm'].unique()
)
club_interactions, _ = club_dataset.build_interactions(
    [(row['Agency_norm'], row['Club_norm'], 1.0) for _, row in transfer_df.iterrows()]
)
club_model = build_model(club_interactions)

# Agency-Player Model
player_dataset = Dataset()
player_dataset.fit(
    users=transfer_df['Agency_norm'].unique(),
    items=transfer_df['Player_norm'].unique()
)
player_interactions, _ = player_dataset.build_interactions(
    [(row['Agency_norm'], row['Player_norm'], 1.0) for _, row in transfer_df.iterrows()]
)
player_model = build_model(player_interactions)


# === Staff-Club Model ===
staff_dataset = Dataset()
staff_dataset.fit(
    users=staff_club['Staff_norm'].unique(),
    items=staff_club['Club_norm'].unique()
)
staff_interactions, _ = staff_dataset.build_interactions(
    [(row['Staff_norm'], row['Club_norm'], 1.0) for _, row in staff_club.iterrows()]
)
staff_model = build_model(staff_interactions)

# Agency-Agent Model
agents_dataset = Dataset()
agents_dataset.fit(
    users=agents['Agents_norm'].unique(),
    items=agents['Agency_norm'].unique()
)
agents_interactions, _ = agents_dataset.build_interactions(
    [(row['Agents_norm'], row['Agency_norm'], 1.0) for _, row in agents.iterrows()]
)
agents_model = build_model(agents_interactions)



#MODEL EVALUATION
def evaluate_existing_model(model, interactions, k=5):
    """
    Evaluate an already-trained model
    Returns: Dictionary of metrics
    """
    return {
        'precision@k': precision_at_k(model, interactions, k=k).mean(),
        'auc': auc_score(model, interactions).mean(),
        'recall@k': recall_at_k(model, interactions, k=k).mean()
    }


# Evaluate all models
print("\n=== Agency-Club Model Evaluation ===")
club_metrics = evaluate_existing_model(club_model, club_interactions)
print(f"Precision: {club_metrics['precision@k']:.4f}")
print(f"AUC: {club_metrics['auc']:.4f}")
print(f"Recall: {club_metrics['recall@k']:.4f}")

print("\n=== Agency-Player Model Evaluation ===")
player_metrics = evaluate_existing_model(player_model, player_interactions)
print(f"Precision: {player_metrics['precision@k']:.4f}")
print(f"AUC: {player_metrics['auc']:.4f}")
print(f"Recall: {player_metrics['recall@k']:.4f}")

print("\n=== Staff-Club Model Evaluation ===")
staff_metrics = evaluate_existing_model(staff_model, staff_interactions)
print(f"Precision: {staff_metrics['precision@k']:.4f}")
print(f"AUC: {staff_metrics['auc']:.4f}")
print(f"Recall: {staff_metrics['recall@k']:.4f}")


print("\n=== Agents-Agencies Model Evaluation ===")
agents_metrics = evaluate_existing_model(agents_model, agents_interactions)
print(f"Precision: {agents_metrics['precision@k']:.4f}")
print(f"AUC: {agents_metrics['auc']:.4f}")
print(f"Recall: {agents_metrics['recall@k']:.4f}")

#=== Interpretation Guide ===
#Precision@5 > 0.5 → Good (>50% of top-5 recommendations are relevant)
#AUC > 0.9 → Excellent ranking
#Recall@5 > 0.5 → Good coverage of relevant items

# Get mapping dictionaries
agency_id_map_club, _, club_id_map, _ = club_dataset.mapping()
agency_id_map_player, _, player_id_map, _ = player_dataset.mapping()

id_to_agency_club = {v: k for k, v in agency_id_map_club.items()}
id_to_club = {v: k for k, v in club_id_map.items()}
id_to_agency_player = {v: k for k, v in agency_id_map_player.items()}
id_to_player = {v: k for k, v in player_id_map.items()}


user_id_map_s, _, item_id_map_s, _ = staff_dataset.mapping()
id_to_user_s = {v: k for k, v in user_id_map_s.items()}  # staff_id -> staff_norm
id_to_item_s = {v: k for k, v in item_id_map_s.items()}  # club_id -> club_norm



# Create mapping for agents to indices
agent_id_map_agency, _, agency_id_map_agent, _ = agents_dataset.mapping()

# Inverse the mappings (indices -> original agent names and agency names)
id_to_agent_agency = {v: k for k, v in agent_id_map_agency.items()}  # agent_index -> agent_name
id_to_agency_agent = {v: k for k, v in agency_id_map_agent.items()}  # agency_index -> agency_name



#RECOMMENDED CLUBS TO AGENCY
def recommend_clubs_to_agency(agency_identifier, top_n=5):
    """Get club recommendations for an agency"""
    agency_norm = normalize_text(str(agency_identifier))


    agency_idx = agency_id_map_club[agency_norm]
    scores = club_model.predict(agency_idx, np.arange(len(club_id_map)))

    results = []
    for idx in np.argsort(-scores)[:top_n]:
        results.append({
            'Club': club_id_to_name.get(id_to_club[idx], id_to_club[idx]),
            'Score': scores[idx]
        })

    return pd.DataFrame(results)

#RECOMMENDED PLAYERS TO AGENCY
def recommend_players_to_agency(agency_identifier, top_n=5):
    """Get player recommendations for an agency"""
    agency_norm = normalize_text(str(agency_identifier))

    agency_idx = agency_id_map_player[agency_norm]
    scores = player_model.predict(agency_idx, np.arange(len(player_id_map)))

    results = []
    for idx in np.argsort(-scores):
        results.append({
            'Player': player_id_to_name.get(id_to_player[idx], id_to_player[idx]),
            'Score': scores[idx]
        })
        if len(results) >= top_n:
            break

    return pd.DataFrame(results)

#RECOMMENDED AGENCIES TO CLUB
def recommend_agencies_to_club(club_identifier, top_n=5):
    """Get agency recommendations for a club"""
    club_norm = normalize_text(str(club_identifier))

    if club_norm not in club_id_map:
        return pd.DataFrame(columns=['Agency', 'Score'])

    club_idx = club_id_map[club_norm]
    scores = club_model.predict(
        np.arange(len(agency_id_map_club)),
        np.repeat(club_idx, len(agency_id_map_club))
    )

    results = []
    for idx in np.argsort(-scores):
        agency_name = agency_id_to_name.get(id_to_agency_club[idx], id_to_agency_club[idx])
        # Skip NaN values
        if pd.isna(agency_name):
            continue
        results.append({
            'Agency': agency_name,
            'Score': scores[idx]
        })
        if len(results) >= top_n:
            break

    return pd.DataFrame(results)

#RECOMMENDED AGENCIES TO PLAYERS
def recommend_agencies_to_player(player_identifier, top_n=5):
    """Get agency recommendations for a player"""
    player_norm = normalize_text(str(player_identifier))

    if player_norm not in player_id_map:
        return pd.DataFrame(columns=['Agency', 'Score'])

    player_idx = player_id_map[player_norm]
    scores = player_model.predict(
        np.arange(len(agency_id_map_player)),
        np.repeat(player_idx, len(agency_id_map_player))
    )

    results = []
    for idx in np.argsort(-scores):
        agency_name = agency_id_to_name.get(id_to_agency_player[idx], id_to_agency_player[idx])
        # Skip NaN values
        if pd.isna(agency_name):
            continue
        results.append({
            'Agency': agency_name,
            'Score': scores[idx]
        })
        if len(results) >= top_n:
            break

    return pd.DataFrame(results)


#RECOMMENDED CLUBS TO PLAYER
def recommend_clubs_to_player(player_identifier, top_n=5):
    """Get club recommendations for a player based on agency patterns"""
    player_norm = normalize_text(str(player_identifier))

    if player_norm not in player_id_map:
        return pd.DataFrame(columns=['Club', 'Score'])

    # Get all agencies that represent this player
    player_agencies = set(transfer_df[transfer_df['Player_norm'] == player_norm]['Agency_norm'])

    if not player_agencies:
        return pd.DataFrame(columns=['Club', 'Score'])

    # Predict clubs for each agency and aggregate scores
    all_scores = np.zeros(len(club_id_map))
    for agency in player_agencies:
        if agency in agency_id_map_club:
            agency_idx = agency_id_map_club[agency]
            all_scores += club_model.predict(agency_idx, np.arange(len(club_id_map)))

    # Normalize by number of agencies
    all_scores /= len(player_agencies)

    results = []
    for idx in np.argsort(-all_scores)[:top_n]:
        club_name = club_id_to_name.get(id_to_club[idx], id_to_club[idx])
        if pd.isna(club_name):
            continue
        results.append({
            'Club': club_name,
            'Score': all_scores[idx]
        })

    return pd.DataFrame(results)


#RECOMMENEDED PLAYERS TO CLUB
def recommend_players_to_club(club_identifier, top_n=5):
    """Get player recommendations for a club based on agency patterns"""
    club_norm = normalize_text(str(club_identifier))

    if club_norm not in club_id_map:
        return pd.DataFrame(columns=['Player', 'Score'])

    # Get all agencies that work with this club
    club_agencies = set(transfer_df[transfer_df['Club_norm'] == club_norm]['Agency_norm'])

    if not club_agencies:
        return pd.DataFrame(columns=['Player', 'Score'])

    # Predict players for each agency and aggregate scores
    all_scores = np.zeros(len(player_id_map))
    for agency in club_agencies:
        if agency in agency_id_map_player:
            agency_idx = agency_id_map_player[agency]
            all_scores += player_model.predict(agency_idx, np.arange(len(player_id_map)))

    # Normalize by number of agencies
    all_scores /= len(club_agencies)

    results = []
    for idx in np.argsort(-all_scores)[:top_n]:
        player_name = player_id_to_name.get(id_to_player[idx], id_to_player[idx])
        if pd.isna(player_name):
            continue
        results.append({
            'Player': player_name,
            'Score': all_scores[idx]
        })

    return pd.DataFrame(results)

#RECOMMENDED STAFF MEMBERS TO CLUB
def recommend_staff_to_club(club_name, top_n=5):
    """Get staff recommendations for a club"""
    club_norm = normalize_text(club_name)

    if club_norm not in item_id_map_s:
        return pd.DataFrame(columns=['Staff', 'Score'])

    club_idx = item_id_map_s[club_norm]
    scores = staff_model.predict(
        np.arange(len(user_id_map_s)),
        np.repeat(club_idx, len(user_id_map_s))
    )

    results = []
    for idx in np.argsort(-scores)[:top_n]:
        staff_name = staff_name_map.get(id_to_user_s[idx], id_to_user_s[idx])
        results.append({
            'Staff': staff_name,
            'Score': scores[idx]
        })

    return pd.DataFrame(results)


#RECOMMENDED CLUBS TO STAFF
def recommend_clubs_to_staff(staff_name, top_n=5):
    """Get club recommendations for a staff member"""
    staff_norm = normalize_text(staff_name)

    if staff_norm not in user_id_map_s:
        return pd.DataFrame(columns=['Club', 'Score'])

    staff_idx = user_id_map_s[staff_norm]
    scores = staff_model.predict(
        staff_idx,
        np.arange(len(item_id_map_s))
        )

    results = []
    for idx in np.argsort(-scores)[:top_n]:
        club_name = club_name_map_staff.get(id_to_item_s[idx], id_to_item_s[idx])
        results.append({
            'Club': club_name,
            'Score': scores[idx]
        })

    return pd.DataFrame(results)


#RECOMMENDED AGENCIES TO AGENTS
def recommend_agencies_to_agent(agent_identifier, top_n=5):
    """Get agency recommendations for an agent"""
    agent_norm = normalize_text(str(agent_identifier))

    if agent_norm not in agent_id_map_agency:
        return pd.DataFrame(columns=['Agency', 'Score'])

    agent_idx = agent_id_map_agency[agent_norm]

    scores = agents_model.predict(
        np.repeat(agent_idx, len(agency_id_map_agent)),
        np.arange(len(agency_id_map_agent))
    )

    results = []
    for idx in np.argsort(-scores):
        agency_name = id_to_agency_agent.get(idx, idx)
        if pd.isna(agency_name):
            continue
        results.append({
            'Agency': agency_name,
            'Score': scores[idx]
        })
        if len(results) >= top_n:
            break

    return pd.DataFrame(results)


##RECOMMENDED AGENTS TO AGENCY
def recommend_agents_to_agency(agency_identifier, top_n=5):
    """Get agent recommendations for an agency"""
    agency_norm = normalize_text(str(agency_identifier))

    if agency_norm not in agency_id_map_agent:
        return pd.DataFrame(columns=['Agent', 'Score'])

    agency_idx = agency_id_map_agent[agency_norm]

    scores = agents_model.predict(
        np.arange(len(agent_id_map_agency)),
        np.repeat(agency_idx, len(agent_id_map_agency))
    )

    results = []
    for idx in np.argsort(-scores):
        agent_name = id_to_agent_agency.get(idx, idx)
        if pd.isna(agent_name):
            continue
        results.append({
            'Agent': agent_name,
            'Score': scores[idx]
        })
        if len(results) >= top_n:
            break

    return pd.DataFrame(results)






# Updated testing function
def test_recommendations():
    """Test recommendations with validation and filtering"""
    # Get agencies that exist in both models
    valid_agencies = list(set(agency_id_map_club.keys()) & set(agency_id_map_player.keys()))

    if not valid_agencies:
        print("No valid agencies found for testing")
        return

    test_agency = random.choice(valid_agencies)
    test_club = random.choice(list(club_id_map.keys()))
    test_player = random.choice(list(player_id_map.keys()))

    test_staff = random.choice(staff_club['Staff_norm'].values)
    test_club_for_staff = random.choice(staff_club['Club_norm'].values)


    test_agent = random.choice(list(agent_id_map_agency.keys()))
    test_agency_for_agent = random.choice(list(agency_id_map_agent.keys()))




    print("\n=== TEST RESULTS ===")
    print(f"\nTesting with Agency: {agency_id_to_name.get(test_agency, test_agency)}")

    # Club recommendations
    clubs = recommend_clubs_to_agency(test_agency)
    print("\nTop recommended clubs for agency:")
    print(clubs.to_string(index=False) if not clubs.empty else "No recommendations")

    # Player recommendations
    players = recommend_players_to_agency(test_agency)
    print("\nTop recommended players for agency:")
    print(players.to_string(index=False) if not players.empty else "No recommendations")

    # Agencies recommended for club
    print(f"\nTesting with Club: {club_id_to_name.get(test_club, test_club)}")
    print(f"\nAgencies recommended for club: ")
    agencies_club = recommend_agencies_to_club(test_club)
    print(agencies_club.to_string(index=False) if not agencies_club.empty else "No recommendations")

    # Players recommendations for club
    print(f"\nPlayers recommended for club: ")
    club_players = recommend_players_to_club(test_club)
    print(club_players.to_string(index=False) if not club_players.empty else "No recommendations")

    # Agent recommendations for player
    print(f"\nTesting with Player: {player_id_to_name.get(test_player, test_player)}")
    print(f"\nAgencies recommended for player: ")
    agencies_player = recommend_agencies_to_player(test_player)
    print(agencies_player.to_string(index=False) if not agencies_player.empty else "No recommendations")

    # Club recommendations for player
    print(f"\nClubs recommended for player: ")
    player_clubs = recommend_clubs_to_player(test_player)
    print(player_clubs.to_string(index=False) if not player_clubs.empty else "No recommendations")


    #  Staff recommendation
    print(f"\nTesting with Staff: {staff_name_map.get(test_staff, test_staff)}")
    staff_recs = recommend_clubs_to_staff(test_staff)
    print("\nRecommended clubs for staff:")
    print(staff_recs.to_string(index=False) if not staff_recs.empty else "No recommendations")

    print(f"\nTesting with Club: {club_name_map_staff.get(test_club_for_staff, test_club_for_staff)}")
    club_recs = recommend_staff_to_club(test_club_for_staff)
    print("\nRecommended staff for club:")
    print(club_recs.to_string(index=False) if not club_recs.empty else "No recommendations")


    #  Agents recommended for agency
    print(f"\nTesting with Agency : {id_to_agency_agent.get(test_agency_for_agent, test_agency_for_agent)}")
    agents_for_agency = recommend_agents_to_agency(test_agency_for_agent)
    print("\nTop recommended agents for agency:")
    print(agents_for_agency.to_string(index=False) if not agents_for_agency.empty else "No recommendations")

    #  Agencies recommended for agent
    print(f"\nTesting with Agent: {id_to_agent_agency.get(test_agent, test_agent)}")
    agencies_for_agent = recommend_agencies_to_agent(test_agent)
    print("\nTop recommended agencies for agent:")
    print(agencies_for_agent.to_string(index=False) if not agencies_for_agent.empty else "No recommendations")




# Run tests
test_recommendations()



import pickle

# Save models and mappings
with open('recommendation_models.pkl', 'wb') as f:
    pickle.dump({
        'club_model': club_model,
        'player_model': player_model,
        'club_dataset': club_dataset,
        'player_dataset': player_dataset,
        'player_id_to_name': player_id_to_name,
        'club_id_to_name': club_id_to_name,
        'agency_id_to_name': agency_id_to_name,
        'agency_id_map_club': agency_id_map_club,
        'club_id_map': club_id_map,
        'agency_id_map_player': agency_id_map_player,
        'player_id_map': player_id_map,
        'id_to_agency_club': id_to_agency_club,
        'id_to_club': id_to_club,
        'id_to_agency_player': id_to_agency_player,
        'id_to_player': id_to_player,
        'transfer_df': transfer_df

    }, f)


# Download the file
#from google.colab import files
#files.download('recommendation_models.pkl')

Epoch: 100%|██████████| 200/200 [02:20<00:00,  1.42it/s]
Epoch: 100%|██████████| 200/200 [02:48<00:00,  1.19it/s]
Epoch: 100%|██████████| 200/200 [00:11<00:00, 17.25it/s]
Epoch: 100%|██████████| 200/200 [00:26<00:00,  7.59it/s]



=== Agency-Club Model Evaluation ===
Precision: 0.5161
AUC: 1.0000
Recall: 0.8513

=== Agency-Player Model Evaluation ===
Precision: 0.5386
AUC: 1.0000
Recall: 0.8799

=== Staff-Club Model Evaluation ===
Precision: 0.2322
AUC: 1.0000
Recall: 0.9093

=== Agents-Agencies Model Evaluation ===
Precision: 0.2036
AUC: 1.0000
Recall: 0.9994

=== TEST RESULTS ===

Testing with Agency: Professional Football Network D.O.O.

Top recommended clubs for agency:
                   Club    Score
               FC Koper 1.928941
    FK Borac Banja Luka 1.891144
           FK Cukaricki 1.884497
            FK Sarajevo 1.882754
FK Zeljeznicar Sarajevo 0.860345

Top recommended players for agency:
         Player    Score
Renato Gojkovic 1.929158
   Gregor Bajde 1.927927
  Denis Popovic 1.912068
Luka Djordjevic 1.876645
     Jens Grahl 0.927265

Testing with Club: Paksi FC

Agencies recommended for club: 
                         Agency    Score
     The Footballers Group Ltd. 3.288459
           Team Ti