# Data import
This notebook contains imports of the data, namely, players, their urls and guesses of other players of the game.


In [11]:
import pandas as pd
from pathlib import Path
from utils.aux_func import create_leaderboard

NB: Don't forget to run all the scripts prior to this step.

In [12]:
# initialize all dataframes
alldf_raw = pd.DataFrame()
df_raw = pd.DataFrame()
df2_raw = pd.DataFrame()

In [13]:
# Load all inputs

ans_file = Path('../../data/input-lie-gt.csv')

if ans_file.is_file():
    alldf_raw = pd.read_csv(ans_file,error_bad_lines=False)
del alldf_raw['Timestamp']
alldf_raw.head()


Unnamed: 0,Чьи факты?,Кто угадывает?,Какой из фактов правда? Отмечать только один ответ.
0,Player1,Player1,Fact 1
1,Player1,Player2,Fact 2
2,Player1,Player3,Fact 1
3,Player1,Player4,Fact 3
4,Player2,Player1,Fact 1


In [14]:
# change column titles
alldf_raw.rename(columns = {alldf_raw.columns[0]:'Owner'},inplace=True)
alldf_raw.rename(columns = {alldf_raw.columns[1]:'Player'},inplace=True)
alldf_raw.rename(columns = {alldf_raw.columns[2]:'Guess'},inplace=True)

alldf_raw.head()

Unnamed: 0,Owner,Player,Guess
0,Player1,Player1,Fact 1
1,Player1,Player2,Fact 2
2,Player1,Player3,Fact 1
3,Player1,Player4,Fact 3
4,Player2,Player1,Fact 1


In [15]:
# selecting rows based on condition
df_raw = alldf_raw.loc[alldf_raw['Owner'] != alldf_raw['Player']]
df_raw

Unnamed: 0,Owner,Player,Guess
1,Player1,Player2,Fact 2
2,Player1,Player3,Fact 1
3,Player1,Player4,Fact 3
4,Player2,Player1,Fact 1
6,Player2,Player3,Fact 3
7,Player2,Player4,Fact 3
8,Player3,Player1,Fact 1
9,Player3,Player2,Fact 1
11,Player3,Player4,Fact 2
12,Player4,Player1,Fact 3


In [16]:
df2_raw = alldf_raw.loc[alldf_raw['Owner'] == alldf_raw['Player']]
del df2_raw['Owner']
df2_raw

Unnamed: 0,Player,Guess
0,Player1,Fact 1
5,Player2,Fact 2
10,Player3,Fact 1
15,Player4,Fact 1


In [17]:
# which owners
owners = df_raw['Owner'].unique()
#print(owners.tolist())

# which players
players = df_raw['Player'].unique()

# which guesses
guess = df_raw['Guess'].unique()
#print(guess.tolist())

In [18]:
# how often each fact was voted for
total_votes = create_leaderboard(df_raw,'Guess','votes')
total_votes = total_votes.sort_values("votes", ascending=False).reset_index()
total_votes.rename(columns = {'index':'Guess'}, inplace = True)
total_votes
#total_votes['votes'] = pd.to_numeric(total_votes['votes'])

Unnamed: 0,Guess,votes
0,Fact 1,5
1,Fact 3,5
2,Fact 2,2


In [19]:
# how often each fact was chosen for
fact_choice = create_leaderboard(df2_raw,'Guess','votes')
fact_choice = fact_choice.sort_values("votes", ascending=False).reset_index()
fact_choice.rename(columns = {'index':'Guess'}, inplace = True)
fact_choice
#total_votes

Unnamed: 0,Guess,votes
0,Fact 1,3
1,Fact 2,1


In [20]:
# stats of guesses per URL
guess_stats = df_raw.groupby(by=['Owner', 'Guess'])['Player'].count()
s = pd.Series(guess_stats, name="votes")
df_guess = s.to_frame().sort_values("votes", ascending=False).reset_index()
df_guess


Unnamed: 0,Owner,Guess,votes
0,Player2,Fact 3,2
1,Player3,Fact 1,2
2,Player4,Fact 3,2
3,Player1,Fact 1,1
4,Player1,Fact 2,1
5,Player1,Fact 3,1
6,Player2,Fact 1,1
7,Player3,Fact 2,1
8,Player4,Fact 1,1
