In [1]:
import pandas as pd
import numpy as np
from math import floor

To use this program, you'll need to make an account on completionist.me. Find your list of games and select "Flat" display. Sort by your playtime to see your top 100 games. Currently, you'll have to do some work yourself, but if I continue with this project I might try to automate it. 

On the page for your top 100 games, hit "View Source" and save the html file you get to this repository. Insert the name of the file into the following quotations:

In [12]:
file = ''

In [3]:
def Cleanup(games):
    '''Cleans up a games dataframe taken from completionist.me'''
    
    #Set index to title
    games = games.set_index('Game')
    
    #Drops null or unneeded columns
    games = games.dropna(axis=1,how='all')
    games = games.drop(['Last Unlock','Type / Systems', 'Achievements Change','Stats'],axis=1)
    
    #Drops unplayed games
    games = games[(games['Playtime'].isna()) == False]
    
    #Gets rid of recent playtime
    games['Playtime'] = games['Playtime'].str.split('+').apply(lambda x: x[0]).str.strip()
    
    #Keeps only games played for more than an hour
    games = games[games['Playtime'].str.contains('h')]
    
    #Converts playtime from hours/minutes to total minutes
    games['Hours'] = games['Playtime'].str.split().apply(lambda x: x[0]).str.strip('h').astype(int)
    games['Minutes'] = games['Playtime'].str.split().apply(lambda x: x[1]).str.strip('m').astype(int)
    games['Playtime'] = games['Hours'] * 60 + games['Minutes']
    games = games.drop(['Hours','Minutes'], axis=1)
    
    #Creates a game completion column based on achievements
    #This isn't used for the chooseGames function, but satisfies curiosity
    games['Achv_Completed'] = games[(games['Achievements'].isna() == False) & 
                               (games['Achievements'].str.contains('/'))]['Achievements'].str.split('/')\
                                .apply(lambda x: x[0]).astype(float)
    games['Achv_Possible'] = games[games['Achievements'].isna() == False]['Achievements']\
                                .str.split('/').apply(lambda x: x[-1]).astype(float)
    games['Achv_%'] = round(games['Achv_Completed'] / games['Achv_Possible'] * 100, 1)
    games.loc[(games['Achv_%'].isna()) & (~(games['Achv_Possible'].isna())), 'Achv_%'] = 0
    games.drop(['Achievements'],axis=1,inplace=True)
    
    #Fixes the rating to be just one number instead of two. 
    #Also not used by randomization function
    games['Rating'] = games['Rating'].apply(lambda x: (float(str(x).split()[0])))
    
    #Sorts by time played
    games.sort_values('Playtime',ascending=False,inplace=True)
    
    return games
    

In [4]:
def chooseGames(df, choices):
    '''Takes in a games dataframe and how many options the user wants
    Returns a list of game options, randomly chosen but weighted by playtime.'''
    winners = []
    for choice in range(choices):
        winners.append(np.random.choice(games.index,p=(games['Playtime']/games['Playtime'].sum())))
    return df.loc[df.index.isin(winners)]

In [5]:
games = pd.read_html(file, header=0)[0]

In [6]:
games = Cleanup(games)

Here is a sample function call. To try it with your own games, select a number of choices you want to give yourself, put it where the 3 is currently, and run the cell.  If you get fewer games than you asked for, the same game came up more than once, which is a sign that you should definitely play one of them. 

In [11]:
choices = 3
chooseGames(games, choices)

Unnamed: 0_level_0,Playtime,Last Played,Rating,Developer / Publisher,Achv_Completed,Achv_Possible,Achv_%
Game,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Tropico 4,5302,2022-12-26 03:36:57,89.23,Haemimont Games Kalypso Media Digital,24.0,70.0,34.3
Planet Zoo,1405,2023-02-18 09:45:11,89.08,Frontier Developments Frontier Developments,15.0,38.0,39.5
Littlewood,419,2023-02-20 05:42:15,90.8,Sean Young SmashGames,11.0,60.0,18.3
