In [1]:
import numpy as np
import pandas as pd

from sklearn.metrics.pairwise import cosine_similarity

In [2]:
drafts = pd.read_csv('./data/sparse_matrix.csv', index_col='match_id')
drafts.replace(0, np.nan, inplace=True)

# Get the maximum index value
max_index = drafts.index.max()

# Create a new row with NaN values and index equal to max_index + 1
new_index = max_index + 1
new_row = pd.DataFrame([[np.nan] * drafts.shape[1]], columns=drafts.columns, index=[new_index])

# Append the new row to the DataFrame using pd.concat
drafts = pd.concat([drafts, new_row])

drafts

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,129,130,131,132,133,134,135,136,137,138
7750912161,,18.0,,,,,,,4.0,,...,,,,,,,,,,
7750914469,,,,,,,,,4.0,18.0,...,,,,,,,,22.0,,
7750915644,,,,,,,,,,,...,,,,,,,,,6.0,
7750937564,24.0,,9.0,,,,,,,,...,,,,,,,,,,
7750968496,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7881664207,,,,,,,,,,,...,,,,,,,,,,
7881677439,,,,,,,,,,13.0,...,,,,,,,,,,
7881696382,,,,,,,,,,,...,,,,,,,,6.0,,
7881723710,,,,,,,23.0,,,,...,,,,,,,,,,


## Item Based Filtering

In [3]:
def raw_cos(a, b):
    # Find common non-null indices
    common_indices = a.dropna().index.intersection(b.dropna().index)
    
    # Filter out the common non-null values
    a_filtered = a.loc[common_indices]
    b_filtered = b.loc[common_indices]
    
    # If there are no common non-null values, return 0
    if len(a_filtered) == 0 or len(b_filtered) == 0:
        return 0
    
    # Reshape the data to be compatible with cosine_similarity function
    a_vector = a_filtered.values.reshape(1, -1)
    b_vector = b_filtered.values.reshape(1, -1)
    
    # Calculate and return the cosine similarity
    return cosine_similarity(a_vector, b_vector)[0][0]


def item_complete(df_utility, k):
    mean_centered = df_utility.sub(df_utility.mean(axis=1), axis = 0)

    similarity_matrix = pd.DataFrame(
        index=mean_centered.columns, columns=mean_centered.columns)

    for i in mean_centered.columns:
        for j in mean_centered.columns:
            similarity_matrix.loc[i,j] = raw_cos(
                mean_centered[i], mean_centered[j])

    np.fill_diagonal(similarity_matrix.values, np.nan)

    completed_matrix = df_utility.copy()

    for user in df_utility.index:
        user_mean_rating = df_utility.loc[user].mean(skipna=True)
        for item in df_utility.columns:
            if pd.isnull(df_utility.loc[user,item]):
                similar_items = similarity_matrix.loc[item].sort_values(
                    ascending=False).head(k).index

                avg_rating = 0
                total_weight = 0

                for similar_item in similar_items:
                    if pd.notnull(df_utility.loc[user, similar_item]):
                        similarity = similarity_matrix.loc[item, similar_item]
                        avg_rating += similarity * \
                            (df_utility.loc[user, similar_item] - 
                             user_mean_rating)
                        total_weight += similarity
                if total_weight > 0:
                    completed_matrix.loc[user,item] = user_mean_rating + \
                        avg_rating / total_weight
                else:
                    completed_matrix.loc[user,item] = np.nan
    return completed_matrix
    

In [4]:
# Function for context-based, based on current pick drop values higher than it
# to recommend 

def limit_game_state(order, drafts):
    current_draft = drafts.copy()
    current_draft[current_draft > order] = np.NaN
    return current_draft

In [5]:

def recommend(draft, current_draft, df_completed, n):
    """
    Recommends top n unrated items to a user based on the completed utility matrix.

    Parameters:
    - draft: int, the ID of the draft for whom to generate recommendations.
    - current_draft: pd.DataFrame, the original utility matrix with NaN for unrated items. limited game state.
    - df_completed: pd.DataFrame, the completed utility matrix with predicted ratings.
    - n: int, the number of top recommendations to return.

    Returns:
    - recommendations: list, a list of recommended item IDs, sorted by predicted rating and then by item ID.
    """
    # Step 1: Identify unrated heroes for the draft in the original matrix
    unrated_heroes = current_draft.loc[draft][current_draft.loc[draft].isna()].index
    
    # Step 2: Retrieve predicted ratings for these unrated heroes from the completed matrix
    predicted_ratings = df_completed.loc[draft, unrated_heroes]
    
    # Step 3: Sort the unrated heroes by predicted rating (descending)
    sorted_recommendations = predicted_ratings.sort_values(ascending=False).index.tolist()
    
    # Step 4: Return the top n recommendations
    return sorted_recommendations[:n]


In [13]:


# Simulate pick/ban order 7.

# draft = drafts.iloc[-1] # pd.Series([np.NaN] * 138, index=range(1, 139))
drafts.iloc[-1, 78] = 1
drafts.iloc[-1, 9] = 4

current_draft = limit_game_state(7, drafts)
item_based = item_complete(current_draft, 5)
item_based



Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,129,130,131,132,133,134,135,136,137,138
7750912161,,,,,,,,,4.0,,...,,,,,,,,,,
7750914469,,,,,,,1.0,,4.0,,...,,,,,,,,,,
7750915644,,,,,3.0,,,5.5,,,...,,,,,,,5.0,,6.0,5.0
7750937564,5.0,,,,,,5.5,,,,...,,,,,,,,,,3.0
7750968496,,,,,,,,4.0,1.0,,...,,,,,,,4.0,,,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7881664207,,,,,,,5.0,,,,...,,,,,,,6.0,,,
7881677439,,,3.0,,,,,,3.0,,...,,,,,,,,,,
7881696382,,,5.0,,,,,,,,...,,,,,,,,6.0,5.0,
7881723710,,,,,,,,,1.0,,...,,,,,,,,,,


In [14]:
recommended = recommend(current_draft.index[-1], current_draft, item_based, 10)

In [15]:
recommended

['19', '41', '48', '81', '97', '64', '9', '13', '25', '42']