In [1]:
import pymongo
import requests
from bs4 import BeautifulSoup
import time
import json

In [3]:
mc = pymongo.MongoClient()  # Connect to the MongoDB server using default settings
db = mc['chess_predictions']  # Use (or create) a database called 'election_predictions'
players = db['players']  # Use (or create) a collection called 'profiles'

In [2]:
def get_player_profile(username):
    try:
        response = requests.get(f'https://api.chess.com/pub/player/{username}')
        return json.loads(response.content.decode('utf-8'))
    except:
        return [] 
        
def get_player_stats(username):
    try:
        response = requests.get(f'https://api.chess.com/pub/player/{username}/stats')
        return json.loads(response.content.decode('utf-8'))
    except:
        return []
        
def get_player_games(username):
    '''return a list of lists where each list contains the games played each the months of January through May 2018'''
    
    months = ['01', '02', '03', '04', '05']
    month_games = []
    for month in months:
        try:
            response = requests.get(f'https://api.chess.com/pub/player/{username}/games/2018/{month}')
            month_games.append([json.loads(response.content.decode('utf-8'))])
        except:
            continue
    return month_games

def player_data_to_mongoDB(username, mongoDB_connection, database, collection):
    
    mc = mongoDB_connection
    
    #use/create a database
    db = mc[database]
    
    #use/create a collection
    collection = db[collection]
    
    #query Chess.com api for data
    profile = get_player_profile(username)
    stats = get_player_stats(username)
    games = get_player_games(username)
    
    #insert player data into database
    collection.insert_one({**profile,
                           **stats,
                           'games': games
                            })

def all_player_data_to_mongoDB(players, mongoDB_connection, database, collection, verbose=False):
    '''insert all player data into a mongoDB
       Params:
          players: list of player names
          mongoDB_connection: open connection to database ex. pymongo.MongoClient()
          database: name of database
          collection: name of collection
       Keyword Args:
          verbose: if True print player name after each insert, default is False
    '''
    for player in players:
        player_data_to_mongoDB(player, mongoDB_connection, database, collection)
        if verbose:
            print(player)
    return pd.DataFrame(list(db[collection].find()))

In [224]:
player_data_to_mongoDB('babu500',pymongo.MongoClient(), 'chess_predictions', 'players')

In [4]:
df = pd.DataFrame(list(db['players'].find()))
df.head()

Unnamed: 0,@id,_id,avatar,chess960_daily,chess_blitz,chess_bullet,chess_daily,chess_rapid,country,followers,games,joined,last_online,location,name,player_id,status,url,username
0,https://api.chess.com/pub/player/babu500,5afa039f098388ad5ff6a3a1,,,"{'last': {'rating': 777, 'date': 1525820389, '...",,"{'last': {'rating': 1200, 'date': 1516320601, ...","{'last': {'rating': 974, 'date': 1522794278, '...",https://api.chess.com/pub/country/US,1,[[{'games': [{'url': 'https://www.chess.com/li...,1516059527,1526310229,,,42361082,premium,https://www.chess.com/member/babu500,babu500
1,https://api.chess.com/pub/player/0rlandomagic,5afa05ce098388c83c3f08fa,,"{'last': {'rating': 1257, 'date': 1526259679, ...","{'last': {'rating': 1800, 'date': 1526089345, ...","{'last': {'rating': 2117, 'date': 1526323151, ...","{'last': {'rating': 1425, 'date': 1525869836, ...",,https://api.chess.com/pub/country/US,13,[[{'games': [{'url': 'https://www.chess.com/da...,1515424234,1526322679,,,42121838,basic,https://www.chess.com/member/0rlandoMagic,0rlandomagic
2,https://api.chess.com/pub/player/0ldtower,5afa063e098388c83c3f08fc,,,,,,,https://api.chess.com/pub/country/US,0,"[[{'games': []}], [{'games': []}], [{'games': ...",1515958780,1526117639,,Nick Chamberlain,42324950,basic,https://www.chess.com/member/0ldTower,0ldtower
3,https://api.chess.com/pub/player/0rlandomagic,5afa0640098388c83c3f08fd,,"{'last': {'rating': 1257, 'date': 1526259679, ...","{'last': {'rating': 1800, 'date': 1526089345, ...","{'last': {'rating': 2117, 'date': 1526323151, ...","{'last': {'rating': 1425, 'date': 1525869836, ...",,https://api.chess.com/pub/country/US,13,[[{'games': [{'url': 'https://www.chess.com/da...,1515424234,1526322679,,,42121838,basic,https://www.chess.com/member/0rlandoMagic,0rlandomagic
4,https://api.chess.com/pub/player/0xvyper,5afa0641098388c83c3f08fe,,,"{'last': {'rating': 705, 'date': 1526217305, '...",,,"{'last': {'rating': 870, 'date': 1516238541, '...",https://api.chess.com/pub/country/US,1,[[{'games': [{'url': 'https://www.chess.com/li...,1515610811,1526216631,,Vyper Dev,42192576,basic,https://www.chess.com/member/0xVyper,0xvyper


In [39]:
games = df['games']

In [54]:
len(games[0][0][0]['games'])

10

In [55]:
month_games = {}
for i in range(4):
    labels = ['January', 'February', 'March', 'April', 'May']
    month_games[labels[i]] = len(games[0][i][0]['games'])
    
print(month_games)

{'January': 10, 'February': 0, 'March': 0, 'April': 23}


In [60]:
my_dict = {'a': [1], 'b': [2], 'c': [3]}
my_dict['a'].append(2)


In [62]:
print(my_dict)

{'a': [1, 2], 'b': [2], 'c': [3]}


In [None]:
df['games'][player][month][0]['games'][one game from the month]['url',
                                                                'pgn',
                                                                'time_control',
                                                                'end_time',
                                                                'rated',
                                                                'fen',
                                                                'time_class',
                                                                'rules',
                                                                'white',
                                                                'black']


In [7]:
#grab only important columns
df_chess = df[['avatar',
               'followers',
               'games',
               'joined',
               'last_online',
               'location',
               'name',
               'player_id',
               'status',
               'username'
               ]]



In [11]:
df_chess['january_games'] = df_chess['games'].apply(lambda x: get_january(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [31]:
df_chess.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1533 entries, 0 to 1532
Data columns (total 12 columns):
avatar           666 non-null object
followers        1533 non-null int64
games            1533 non-null object
joined           1533 non-null int64
last_online      1533 non-null int64
location         75 non-null object
name             952 non-null object
player_id        1533 non-null int64
status           1533 non-null object
username         1533 non-null object
january_games    1533 non-null object
feb_games        1533 non-null object
dtypes: int64(4), object(8)
memory usage: 143.8+ KB


In [30]:
len(df_chess['january_games'][0]['games'])

10

In [34]:
games = df['games'].values

In [35]:
january = games[0]
february = games[1]
march = games[2]
april = games[3]
may = games[4]

In [38]:
january

[[{'games': [{'black': {'@id': 'https://api.chess.com/pub/player/smal10',
      'rating': 1065,
      'result': 'win',
      'username': 'smal10'},
     'eco': 'https://www.chess.com/openings/C50-Giuoco-Piano-Game-Four-Knights-Game',
     'end_time': 1516154262,
     'fen': '4K1q1/8/3k1P2/1B1b4/8/8/8/8 w - -',
     'pgn': '[Event "Live Chess"]\n[Site "Chess.com"]\n[Date "2018.01.17"]\n[Round "-"]\n[White "babu500"]\n[Black "smal10"]\n[Result "0-1"]\n[ECO "https://www.chess.com/openings/C50-Giuoco-Piano-Game-Four-Knights-Game"]\n[WhiteElo "953"]\n[BlackElo "1065"]\n[TimeControl "900+10"]\n[Termination "smal10 won by checkmate"]\n[StartTime "01:57:42"]\n[EndDate "2018.01.17"]\n[EndTime "01:57:42"]\n[Link "https://www.chess.com/live/game/2556567984"]\n\n1. e4 {[%clk 0:15:08]} 1... e5 {[%clk 0:15:08]} 2. Nf3 {[%clk 0:15:11]} 2... Nc6 {[%clk 0:15:17]} 3. Nc3 {[%clk 0:15:19]} 3... Nf6 {[%clk 0:15:26]} 4. Bc4 {[%clk 0:15:28]} 4... Bc5 {[%clk 0:15:33]} 5. O-O {[%clk 0:15:34]} 5... d6 {[%clk 0:

In [47]:
january_games = []
february_games = []
march_games = []
april_games = []
may_games = []
for game in games:
    january_games.append(game[0])
    february_games.append(game[1])
    march_games.append(game[2])
    april_games.append(game[3])
    may_games.append(game[4])

In [None]:
january_games