# Combining Both Ranking using Reciprocal Rank Fusion

### Import

In [67]:
import pandas as pd
import numpy as np

### Load datasets
Transform so that both have same column names for merging

In [None]:
collaborative_df = pd.read_csv('all_players_top10_recommendations.csv')
content_df = pd.read_csv('recommendations_for_all_players.csv')
#rename player ID to playerid for merging
collaborative_df.rename(columns={'Player ID':'playerid','Rank':'rank','Game ID':'gameid','Title':'title'}, inplace=True)
display(collaborative_df.head())

### Rank similarity score to ranks
convert similarity scores to ranks

In [73]:
# Ensure similarity_score is numeric, then rank based on similarity_score
content_df['similarity_score'] = pd.to_numeric(content_df['similarity_score'])
content_df['rank'] = content_df.groupby('playerid')['similarity_score'].rank(ascending=False, method='first').astype(int)
ranked_content_df = content_df[['playerid', 'gameid', 'title', 'rank']]
print(ranked_content_df.head())


            playerid  gameid                        title  rank
0  76561198060698936  239200  Amnesia: A Machine for Pigs     1
1  76561198060698936  231160                  The Swapper     2
2  76561198060698936  365590   Tom Clancy’s The Division™     5
3  76561198060698936  285900                  Gang Beasts     6
4  76561198060698936  481110                   The Bunker     3


In [57]:
#unique player ids
player_ids_collab = collaborative_df['playerid'].unique().tolist()
player_ids_content = ranked_content_df['playerid'].unique().tolist()

# common player ids
common_player_ids = set(player_ids_collab).intersection(set(player_ids_content))
print(f"Number of common player ids: {len(common_player_ids)}")
only_in_collab = set(player_ids_collab) - common_player_ids
only_in_content = set(player_ids_content) - common_player_ids
print(f"Number of player ids only in collaborative df: {len(only_in_collab)}")
print(f"Number of player ids only in content df: {len(only_in_content)}")


Number of common player ids: 9894
Number of player ids only in collaborative df: 55763
Number of player ids only in content df: 37038


## Reciprocal Rank Fusion (RRF) Explanation

**Purpose:** This function combines two ranked recommendation lists—one from a *collaborative filtering* model and another from a *content-based* model—into a single unified ranking for each user.

**How it works:** RRF (Reciprocal Rank Fusion) assigns a *score* to each recommended item based on its rank position in each model’s list.  
Higher-ranked items receive higher scores, and items appearing in both lists get their scores summed—rewarding agreement between models.

**Formula:**
$$
\text{RRF\_score}(d) = \sum_{s \in \{collab, content\}} \frac{1}{k + \text{rank}_{s}(d)}
$$

Where:
- $d$ = a recommended game
- $s$ = source system (collaborative or content-based)
- $\text{rank}_s(d)$ = the position of game $d$ in source $s$’s ranked list
- $k$ = a smoothing constant (60) that reduces the effect of rank differences

After computing these scores, items are re-ranked **within each user** by descending `RRF_score`, producing a unified `combinedrank`.

In [None]:
def get_fused_ranks(collaborative_df, ranked_content_df, k=60):
  
    collab_df = collaborative_df.rename(columns={'rank': 'collabrank'})
    content_df = ranked_content_df.rename(columns={'rank': 'contentrank'})

    #merge both df
    merged_df = pd.merge(
        collab_df,
        content_df,
        on=['playerid', 'gameid'],
        how='outer',
        suffixes=('_collab', '_content') 
    )

    merged_df['title'] = merged_df['title_collab'].fillna(merged_df['title_content'])

    # Calculate RRF Score
    recip_collab = (1 / (k + merged_df['collabrank'])).fillna(0)
    recip_content = (1 / (k + merged_df['contentrank'])).fillna(0)

    # The final score is the sum of the individual reciprocal scores
    merged_df['rrf_score'] = recip_collab + recip_content

    #calculate RRF Score
    merged_df['combinedrank'] = merged_df.groupby('playerid')['rrf_score'] \
                                         .rank(method='first', ascending=False) \
                                         .astype(int)
    
    final_cols = ['playerid', 'gameid', 'title', 'collabrank', 'contentrank', 'combinedrank']
    
    # Sort for readability
    final_df = merged_df[final_cols].sort_values(by=['playerid', 'combinedrank'])

    return final_df



In [77]:
# --- Run the function ---
fused_results = get_fused_ranks(collaborative_df, ranked_content_df, k=0)

#export to csv
fused_results.to_csv('fused_recommendations_all_players.csv', index=False)

In [None]:
#group by playerid and show recommendations if there are recommednations for both models
for playerid, group in fused_results.groupby('playerid'):
    if group['collabrank'].notna().any() and group['contentrank'].notna().any():
        print(f"Recommendations for Player ID: {playerid}")
        print(group.to_markdown(index=False))
        print("\n")

In [None]:

player_stats = fused_results.groupby('playerid')[['collabrank', 'contentrank']].count()

# Get the index (playerid) of players who satisfy the condition
players_in_both = player_stats[(player_stats['collabrank'] > 0) & 
                               (player_stats['contentrank'] > 0)].index

# Filter the fused_results to only keep these players
players_with_both_df = fused_results[fused_results['playerid'].isin(players_in_both)].copy()

print(f"--- Filtered for Players in Both Lists (Players {players_in_both.to_list()}) ---")

top_5_df = players_with_both_df.groupby('playerid').head(5)


#output_filename = 'top_5_fused_ranks_per_player.csv'
#top_5_df.to_csv(output_filename, index=False)
#print(f"Successfully saved to {output_filename}")



--- Filtered for Players in Both Lists (Players [76561197960272112, 76561197960272169, 76561197960273069, 76561197960278782, 76561197960283501, 76561197960283914, 76561197960285031, 76561197960288251, 76561197960292462, 76561197960294884, 76561197960305054, 76561197960315996, 76561197960323682, 76561197960333814, 76561197960344431, 76561197960365241, 76561197960367819, 76561197960371132, 76561197960376165, 76561197960399877, 76561197960400971, 76561197960409453, 76561197960417919, 76561197960424448, 76561197960431837, 76561197960444395, 76561197960453399, 76561197960460699, 76561197960463278, 76561197960469767, 76561197960477022, 76561197960501870, 76561197960507446, 76561197960532616, 76561197960541294, 76561197960556024, 76561197960558959, 76561197960565916, 76561197960576978, 76561197960597021, 76561197960647831, 76561197960701412, 76561197960703595, 76561197960707530, 76561197960759094, 76561197960772584, 76561197960798014, 76561197960833481, 76561197960859880, 76561197960874101, 7