In [2]:
'''
Notebook to generate graphs for player skill over time at different puzzles
'''

import pandas as pd
import numpy as np
from tqdm import tqdm_notebook as tqdm

tqdm().pandas()

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




In [3]:
attempts=pd.read_csv('/w/225/1/chess/tactics/glicko_user_tactics_problem.csv_00')

In [4]:
#Get the last rating for each puzzle and join on the attempts
puzzle_ratings = attempts.groupby('tactics_problem_id').tail(1)[['tactics_problem_id','ratingProblem']]
puzzle_ratings.rename(columns={'ratingProblem':'rating'},inplace=True)
attempts.drop('ratingProblem',axis=1,inplace=True)
attempts = pd.merge(attempts,puzzle_ratings,on='tactics_problem_id')

In [5]:
#Filter for users that have only played at least x games
#attempts=attempts.loc[attempts.userGamesPlayed.between(2500,4000)]
attempts = attempts.loc[attempts.userGamesPlayed > 1000]

In [8]:
#Function to return string version of interval
def range_to_string(range_list):
    return ['{} to {}'.format(x.left,x.right) for x in range_list]

In [27]:
#Create rating labels for the players
k = 5
attempts['player_rating_labels'] = pd.qcut(attempts['ratingUser'],k)
player_rating_labels = attempts.player_rating_labels.unique().categories.to_list()
player_string_labels = range_to_string(player_rating_labels)

In [29]:
trimmed_data = attempts[['seconds','is_passed','player_rating_labels','rating']]

In [25]:
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import seaborn as sns

def plot_graph_2(data,puzzle_ranges,player_index):
    figure(figsize=(8, 6), dpi=80, facecolor='w', edgecolor='k')
    colors = sns.color_palette("hls", len(data))
    for index,data in enumerate(probs):
        color = colors[index]
        #Calculate standard error 
        stderror = np.sqrt(np.var(data)/len(data))
        plt.plot(data,color=color,label=puzzle_ranges[index])
        #Add 1.96 times the standard error to get the 95% confidence interval 
        plt.fill_between(time_range,data-(1.96*stderror),data+(1.96*stderror),alpha=0.4,color=color)
        
    plt.ylabel('Probability of success')
    plt.xlabel('Time waited >= x')
    plt.title('Success at puzzles for players in range: {}'.format(player_string_labels[player_index]))
    plt.legend(loc=2)
    plt.savefig('../plots/timeplots/puzzle_time_success_{}_players.png'.format(player_string_labels[player_index]).replace(' ','_'))
    plt.clf()

In [28]:
!rm -f ../plots/timeplots/*

In [31]:
#QCut attempt
time_range = list(range(0,40))
# if A = # of correct attempts and B = # of problems where seconds >= x time and ratings for user and problem are 
#within a certain range. Want to find: P(A|B) = P(A&B)/P(B)
pbar = tqdm(total=k)
for index,player_label in enumerate(player_rating_labels):
    probs = []
    player_problems = trimmed_data.loc[trimmed_data.player_rating_labels == player_label].copy()
    player_problems['problem_rating_labels'] = pd.qcut(player_problems['rating'],k)
    problem_rating_labels = player_problems['problem_rating_labels'].unique().categories.to_list()
    print('Generating plot for players in range: {}'.format(player_string_labels[index]))
    for prob_label in problem_rating_labels:
        print('Generating plot for puzzles between: {} and {}'.format(prob_label.left,prob_label.right))
        rating_probs = []
        #Get problems that have both the correct problem rating label and player rating label
        rating_trimmed = player_problems.loc[player_problems.problem_rating_labels == prob_label]
        print('Num of attempts: ' + str(len(rating_trimmed)))
        #Filter the problemd from rating_trimmed that were attempted after at least time seconds
        for time in time_range:
            b_condition = rating_trimmed.loc[rating_trimmed.seconds >= time]
            try:
                rating_probs.append(len(b_condition.loc[b_condition.is_passed == 1])/len(b_condition))
            except:
                rating_probs.append(0)
        probs.append(rating_probs)
    #Making plot
    plot_graph_2(probs,range_to_string(problem_rating_labels),index)
    pbar.update(1)
pbar.close()

HBox(children=(IntProgress(value=0, max=5), HTML(value='')))

Generating plot for players in range: 426.999 to 1778.0
Generating plot for puzzles between: 711.999 and 1417.0
Num of attempts: 2058354
Generating plot for puzzles between: 1417.0 and 1532.0
Num of attempts: 2032932
Generating plot for puzzles between: 1532.0 and 1626.0
Num of attempts: 2035731
Generating plot for puzzles between: 1626.0 and 1733.0
Num of attempts: 2059658
Generating plot for puzzles between: 1733.0 and 2981.0
Num of attempts: 2024310
Generating plot for players in range: 1778.0 to 1900.0
Generating plot for puzzles between: 726.999 and 1627.0
Num of attempts: 2035696
Generating plot for puzzles between: 1627.0 and 1716.0
Num of attempts: 2033010
Generating plot for puzzles between: 1716.0 and 1791.0
Num of attempts: 2040928
Generating plot for puzzles between: 1791.0 and 1887.0
Num of attempts: 2024209
Generating plot for puzzles between: 1887.0 and 2994.0
Num of attempts: 2021722
Generating plot for players in range: 1900.0 to 2009.0
Generating plot for puzzles betw

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

In [18]:
player_string_labels

['426.999 to 1778.0',
 '1778.0 to 1900.0',
 '1900.0 to 2009.0',
 '2009.0 to 2147.0',
 '2147.0 to 3774.0']