In [2]:
import pandas as pd

In [3]:
import numpy as np

In [4]:
from importlib.machinery import SourceFileLoader

tools = SourceFileLoader("tools", "../tools.py").load_module()

from tools import write_to_file

In [5]:
df = pd.read_csv('../data/prisoners_dilemma_articles_meta_data_clean.csv')

In [6]:
years = sorted(df.date.unique())

In [7]:
periods = np.linspace(min(years), max(years), 10)

In [8]:
tables = []
for year in periods[2:]:
    topic_in_year = pd.read_csv(f'../data/topics_up_to_{year}.csv')
    
    number_of_topics = len(topic_in_year['Dominant_Topic'].unique())
    
    topic_counts = topic_in_year[['Dominant_Topic']]['Dominant_Topic'].value_counts()
    
    topic_contribution = round(topic_counts / topic_counts.sum(), 4)
    
    table = pd.DataFrame([topic_counts.index,
                          topic_counts,
                          topic_contribution,
                          topic_in_year['Topic_Keywords'].unique()]).T
    table.columns = ['Dominant_Topic', 'Num of Documents', 'Percentage of Documents', 'Topic_Keywords']
    table['Period'] = f'1951-{int(year)}'
    table['Dominant_Topic'] = range(number_of_topics)
    
    tables.append(table)

In [9]:
table = pd.concat(tables).reset_index(drop=True)

In [11]:
table['Period'].unique()

array(['1951-1965', '1951-1973', '1951-1980', '1951-1988', '1951-1995',
       '1951-2003', '1951-2010', '1951-2018'], dtype=object)

In [174]:
table[['Topic_Keywords', 'Dominant_Topic', 'Num of Documents', 'Percentage of Documents', 'Period']]

Unnamed: 0,Topic_Keywords,Dominant_Topic,Num of Documents,Percentage of Documents,Period
0,"problem, technology, divert, euler, subsystem,...",0,3,0.375,1951-1965
1,"interpret, requirement, programme, evolution, ...",1,2,0.25,1951-1965
2,"equipment, agency, conjecture, development, un...",2,1,0.125,1951-1965
3,"variation, celebrated, trend, untried, change,...",3,1,0.125,1951-1965
4,"give, good, modern, trace, technique, ambiguit...",4,1,0.125,1951-1965
...,...,...,...,...,...
67,"base, model, information, time, paper, propose...",2,418,0.1693,1951-2018
68,"strategy, player, game, agent, play, payoff, s...",3,323,0.1308,1951-2018
69,"cooperation, individual, group, cooperative, l...",4,308,0.1247,1951-2018
70,"population, model, evolutionary, evolution, re...",5,279,0.113,1951-2018


In [175]:
with pd.option_context("max_colwidth", 1000):
    write_to_file(metric=table[['Period', 'Dominant_Topic','Topic_Keywords', 'Num of Documents',
                         'Percentage of Documents']].to_latex(index=False),
                  filename="topics_per_year_table.tex")