In [81]:
import numpy as np
import pandas as pd
from scipy import sparse
import pickle
import ast


def recommend():
    # Import user games, steam game data and other user data
    myg = pd.read_excel("Data/mygames.xlsx")
    sg = pd.read_csv("Data/clean_sg.csv", index_col="appid")
    # Load from sparse file and column headers
    ug = sparse.load_npz("Data/User_Data_tr.npz")
    with open ("Data/g_list.txt", 'rb') as fp:
        mygam = pickle.load(fp)
    # Create dataframe with user games in
    mygames = pd.DataFrame(index=mygam)
    mygames["mg"] = [(int(col) in list(myg.appid))*1 for col in mygam]
    nmg = sum(mygames["mg"])
    # Convert dataframe to sparse matrix
    mg = sparse.coo_matrix(mygames.astype("int32").values).tocsr()
    # Dot product of matricies to count number of games both parties own
    prod = np.array(ug.dot(mg).sum(1))
    # Count number of games a user has
    ct = np.array(ug.sum(1))
    # Calc similarity score
    sim = prod**2 / ((nmg+1)*(ct+1))
    # Extract indicies of 10 best matches. 
    tt = pd.DataFrame(sim).nlargest(30, 0).sort_index()
    # Weight scores in matrix by similarity and then sum to get total score by game
    s = ug[tt.index].multiply(np.array([[i] for i in tt[0]])).sum(0)
    mygames["score"] = s.tolist()[0]
    # Join results with information frame
    mygames = mygames.join(sg)
    # Adjust score to reflect review positivity
    mygames["score"] = mygames["score"]*mygames["%pos"]
    return mygames.nlargest(1000, "score")


def filter_results(r,c):
    # Filter data based on user config settings
    if c["max_age"]:
        r = r.loc[r["required_age"]<=c["max_age"]]
    if c["language"]:
        r = r.loc[r.apply(lambda x: c["language"] in ast.literal_eval(x))]
    if c["free_only"]:
        r = r.loc[r["price"]==0]
    if c["test_mode"] != True:
        r = r.loc[r["mg"]==0]
    return r.nlargest(c["per_page"]*c["page"], "score").iloc[-c["per_page"]:]


def format_output(r, c):
    # Set dataframe to desired format for display
    cols = ['name', '%pos', 'genre', 'price', 'discount']
    if c["test_mode"]:
        cols = ["mg", "score"]+cols  
    return r[cols]

    
def main(c):
    # Create recommendation scores
    recommendations = recommend()
    # Filter to match configs
    r = filter_results(recommendations, c)
    # Create output
    r = format_output(r, c)
    return r


# Configs
configs = {
    "max_age"   : False,
    "language"  : "English",
    "free_only" : False,
    "per_page"  : 20,
    "page"      : 1,
    "test_mode" : True
}

r = main(configs)
r

ValueError: malformed node or string: 212680    1
220       0
72850     1
240       0
620       1
         ..
8400      0
8970      0
9000      0
9010      0
9030      0
Name: mg, Length: 1000, dtype: object