# run tests

In [1]:
import os
import sys
import inspect
import pandas as pd

from chandra_bot import ChandraBot as cbot

In [2]:
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
moduledir = os.path.join(parentdir, "chandra_bot")
sys.path.insert(0, moduledir) 

import chandra_bot_data_model_pb2 as dm

In [3]:
example_dir = "../examples"

In [4]:
bot = cbot.create_bot(
        paper_file=os.path.join(example_dir, "small_fake_paper_series.csv"),
        review_file=os.path.join(example_dir, "small_fake_review_series.csv"),
        human_file=os.path.join(example_dir, "small_fake_human.csv"),
)

In [5]:
print("Assembling the paper book")
bot.assemble_paper_book()

Assembling the paper book


In [None]:
# do we have paper.authors here?
# no. so the problem is the creation of the bot does not have the authors
# so the problem is when the paper book gets assembled
# walk through the logic of the `assemble_paper_book' method

In [10]:
paper_id = bot.paper_df.index[0]
paper_id

'2015/1'

In [11]:
paper = bot.paper_book.paper.add()

In [12]:
paper.number = paper_id

In [13]:
paper_row = bot.paper_df.loc[paper_id]

In [14]:
bot._attribute_paper(paper, paper_row)

In [15]:
"author_ids" in bot.paper_df.columns

True

In [17]:
author_id = paper_row.author_ids.split(",")[0]
author_id

'1'

In [18]:
author_id in bot.human_df["author_id"]

False

In [20]:
int (author_id) in bot.human_df["author_id"]

True

In [7]:
# okay, so here's the problem. author_id in papers is a string, but it's stored
# as an int in human_df. 
# it shouldn't be though, as HUMAN_DICT calls for it to be a string

[]

In [22]:
bot.human_df.dtypes

name                       string
aliases                    string
hash_id                    string
current_affiliation        string
previous_affiliation       string
last_degree_affiliation    string
orcid_url                  string
orcid                      string
author_id                  string
verified                     bool
dtype: object

In [23]:
type(author_id)

str

In [24]:
type(bot.human_df["author_id"][0])

str

In [25]:
author_id == bot.human_df["author_id"][0]

True

In [27]:
author_id in bot.human_df["author_id"]

False

In [28]:
author_id in bot.human_df["author_id"][0]

True

In [29]:
# df['company_name'].eq('ABC').any()
bot.human_df["author_id"].eq(author_id).any()

True

In [None]:
# okay. made this change. see if tests run.

In [9]:
df.head()

Unnamed: 0_level_0,authors,author_ids,title,year,committee_presentation_decision,committee_publication_decision,abstract,body
paper_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2015/1,"Francesca Geisinsky,Raymon Gilpatrick,Victoria...",123456,Plurinacionalidad y Vivir Bien/Buen Vivir: Dos...,2015,Reject,Reject,Each country is in the image of its inhabitant...,
2015/2,"Elfrieda Loxton,Dina Dage,Sibyl Locatelli,Gena...",7891011,Disciplining Interdisciplinarity: Integration ...,2015,Accept,Reject,There have been hitches. I sent Tom a couple o...,
2015/3,"Asia Smeathers,Ashli Kluemper,Katelynn Dangerf...",121314,Fact and Fiction: Literary and Scientific Cult...,2015,Reject,Reject,Avoid the temptation to skip lunch. Clean your...,
2015/4,"Dustin Barben,Janeth Zierenberg,Ryann Babicke,...",15161718,"La nación y su historia, independencias, relat...",2015,Reject,Reject,"""Who is there?"" she asked without raising her ...",
2015/5,"Felica Layva,Theda Headly,Deborah Tavolacci,Wi...",1920212223,Intersex Narratives: Shifts in the Representat...,2015,Reject,Reject,I have a long vacation coming up. Tom is lucky...,


In [None]:
print("Computing normalized scores")
bot.compute_normalized_scores()

In [None]:
print("Computing normalized scores (via dataframes)")
bot.compute_normalized_scores(dataframe_only=True)

In [None]:
print("Writing paper book to disk")
book_file = os.path.join(example_dir, "fake_serialized_paper_book.text")
bot.write_paper_book(output_file=book_file)

In [None]:
print("Read paper book from disk")
bot = cbot.read_paper_book(book_file)

In [None]:
input_file = book_file

In [None]:
paper_book = dm.PaperBook()
try:
    with open(input_file, "rb") as f:
        paper_book.ParseFromString(f.read())
except IOError:
    print(input_file + ": File not found.")

In [None]:
bot = cbot(input_paper_book=paper_book)
bot.paper_df = bot.make_dataframe(dataframe_name="paper")
bot.review_df = bot.make_dataframe(dataframe_name="review")

In [None]:
bot.human_df = bot.make_dataframe(dataframe_name="human")

In [None]:
# start here and walk through the make_dataframe method to find the bug

In [None]:
output_df = pd.DataFrame()

In [None]:
author_id_df = bot._make_author_id_df()

In [None]:
author_id_df.head()

In [None]:
paper = bot.paper_book.paper[1]

In [None]:
authors_df = pd.DataFrame()

In [None]:
author = paper.authors

In [None]:
df = bot.paper_df

In [None]:
df.head()

In [None]:
print("Make dataframes")
paper_out_df = bot.make_dataframe("paper")
review_out_df = bot.make_dataframe("review")
human_out_df = bot.make_dataframe("human")

In [None]:
print("Count former co-authors")
bot.count_former_coauthors()

In [None]:
print("Count former co-authors (via dataframes)")
bot.count_former_coauthors(dataframe_only=True)

In [None]:
print("Mean verified review score")
bot.append_verified_reviewer(min_count=2, dataframe_only=False)

In [None]:
print("Mean verified review score (via dataframes)")
    bot.append_verified_reviewer(min_count=2, dataframe_only=True)