In [18]:
import numpy as np
import pandas as pd
from scipy import sparse
import pickle
import re


def recommend():
    # Import user games, steam game data and other user data
    myg = pd.read_excel("Data/mygames.xlsx")
    sg = pd.read_csv("Data/clean_sg.csv", index_col="appid")
    # Load from sparse file and column headers
    ug = sparse.load_npz("Data/User_Data_tr.npz")
    with open ("Data/g_list.txt", 'rb') as fp:
        mygam = pickle.load(fp)
    # Create dataframe with user games in
    mygames = pd.DataFrame(index=mygam)
    mygames["mg"] = [(int(col) in list(myg.appid))*1 for col in mygam]
    nmg = sum(mygames["mg"])
    # Convert dataframe to sparse matrix
    mg = sparse.coo_matrix(mygames.astype("int32").values).tocsr()
    # Dot product of matricies to count number of games both parties own
    prod = np.array(ug.dot(mg).sum(1))
    # Count number of games a user has
    ct = np.array(ug.sum(1))
    # Calc similarity score
    sim = prod**2 / ((nmg+1)*(ct+1))
    # Extract indicies of 10 best matches. 
    tt = pd.DataFrame(sim).nlargest(30, 0).sort_index()
    # Weight scores in matrix by similarity and then sum to get total score by game
    s = ug[tt.index].multiply(np.array([[i] for i in tt[0]])).sum(0)
    mygames["score"] = s.tolist()[0]
    # Join results with information frame
    mygames = mygames.join(sg)
    # Adjust score to reflect review positivity
    mygames["score"] = mygames["score"]*mygames["%pos"]
    return mygames.nlargest(1000, "score")


def filter_results(r,c):
    # Filter data based on user config settings
    if c["max_age"]:
        r = r.loc[r["required_age"]<=c["max_age"]]
    if c["language"]:
        r = r.loc[r["languages"].apply(lambda x: c["language"].lower() in str(x).lower())]
    if c["free_only"]:
        r = r.loc[r["price"]==0]
    if c["test_mode"] != True:
        r = r.loc[r["mg"]==0]
    return r.nlargest(c["per_page"]*c["page"], "score").iloc[-c["per_page"]:]


def format_output(r, c):
    # Set dataframe to desired format for display
    cols = ['name', '%pos', 'genre', 'price', 'discount']
    if c["test_mode"]:
        cols = ["mg", "score"]+cols  
    return r[cols]

    
def main(c):
    # Create recommendation scores
    recommendations = recommend()
    # Filter to match configs
    r = filter_results(recommendations, c)
    # Create output
    r = format_output(r, c)
    return r


# Configs
configs = {
    "max_age"   : False,
    "language"  : "English",
    "free_only" : False,
    "per_page"  : 10,
    "page"      : 1,
    "test_mode" : False
}

r = main(configs)
r

Unnamed: 0,name,%pos,genre,price,discount
220,Half-Life 2,97.2,Action,1.99,80
240,Counter-Strike: Source,96.0,Action,1.99,80
340,Half-Life 2: Lost Coast,86.2,Action,0.0,0
8930,Sid Meier's Civilization® V,96.1,Strategy,29.99,0
320,Half-Life 2: Deathmatch,89.6,Action,0.99,80
210770,Sanctum 2,90.1,"Action, Indie, Strategy",3.74,75
200510,XCOM: Enemy Unknown,94.6,Strategy,7.49,75
49520,Borderlands 2,93.3,"Action, RPG",4.99,75
8190,Just Cause 2,91.2,"Action, Adventure",1.49,90
20920,The Witcher 2: Assassins of Kings Enhanced Edi...,89.8,RPG,2.99,85
