In [7]:
'''
I'm gonna use my database of TFT matches to generate a dataset
I can work with to make the training programme more realistic
'''

from pymongo import MongoClient

In [8]:
client = MongoClient()
db = client['tfstacticsDB']

In [9]:
game_versions = db.matches.distinct('info.game_version')
releases = sorted(list(set([float(x.split()[-1].split('/')[-1][:-1]) for x in game_versions])))
releases


[13.18, 13.23, 13.24, 14.1]

In [10]:
release_counts = {}
for release in releases:
    release_counts[release] = db.matches.count_documents({'info.game_version': {'$regex': f'{release}>'}})
release_counts

{13.18: 98168, 13.23: 165417, 13.24: 58633, 14.1: 62730}

This is one of the simplest datasets I could create, the goal would be to predict the placement based on those features. The model wouldn't be very useful, but it serves for testing purposes. After I get comfortable with the workflows I'll add more features

In [5]:
columns = [
    'placement',
    'level',
    'gold_left',
    'total_damage_to_players',
    'num_units'
]


latest_release = releases[-1]

pipeline = [
    {
        '$match': {
            'info.game_version': {'$regex': f'{latest_release}>'}
        }
    }
]

query = db.matches.aggregate(pipeline, allowDiskUse=False)

with open('tft_matches.csv', 'w') as f:
    f.write(','.join(columns) + '\n')
    for match in query:
        participants = match['info']['participants']
        for participant in match['info']['participants']:
            features = {
                "placement":participant['placement'],
                "level":participant['level'],
                "gold_left":participant['gold_left'],
                "total_damage_to_players":participant['total_damage_to_players'],
                "num_units":len(participant.get('units', [])),
            }
            f.write(','.join([str(features.get(col, '')) for col in columns]) + '\n')