In [1]:
import pandas as pd


pd.set_option('display.max_columns', None)
data = pd.read_csv("../data/ai_research_papers.csv")
data.at[222653, 'author_names'] = 'Dudík, Miroslav;Haghtalab, Nika;Luo, Haipeng;Schapire, Robert E.;Syrgkanis, Vasilis;Vaughan, Jennifer Wortman'
data_exploded = data.assign(author_afids=data['author_afids'].str.split(';'), author_ids=data['author_ids'].str.split(';'), author_names = data['author_names'].str.split(';'))
data_exploded = data_exploded.explode(['author_afids', 'author_names', 'author_ids'])
data_exploded['author_rank'] = data_exploded.groupby('eid').cumcount() + 1
data_exploded = data_exploded[['eid', 'year', 'origin_ref','author_count', 'author_names', 'author_ids', 'author_afids', 'author_rank', 'citedby_count']]

aff_ids_1 = pd.read_csv("../data/ai_papers_affiliations.csv")
aff_ids_2 = pd.read_csv("../data/ai_papers_affiliations_append.csv")
aff_ids_2 = aff_ids_2[['afid', 'affiliation_name', 'org_type', 'city', 'state', 'country']].drop_duplicates()
aff_ids = pd.concat([aff_ids_1, aff_ids_2])
aff_ids = aff_ids.loc[aff_ids['country'].notna()].drop_duplicates(subset = 'afid')

data_fin = pd.merge(data_exploded, aff_ids, how = 'inner', left_on = 'author_afids', right_on = 'afid')
data_fin = data_fin.sort_values(["year","eid", "author_rank"])

data_dict = {}
for ind, row in data_fin.iterrows() :
    if row['eid'] not in data_dict :
        data_dict[row['eid']] = {}
        data_dict[row['eid']]['year'] = row['year'] 
        data_dict[row['eid']]['origin_ref'] = row['origin_ref'] 
        data_dict[row['eid']]['author_count'] = row['author_count'] 
        data_dict[row['eid']]['citedby_count'] = row['citedby_count'] 
        data_dict[row['eid']]['aff_country'] = []
    data_dict[row['eid']]['aff_country'].append(row['country'])


  exec(code_obj, self.user_global_ns, self.user_ns)


In [2]:
import json

def save_dict_to_json(dictionary, filename):
    with open(filename, 'w') as json_file:
        json.dump(dictionary, json_file)

        
###각 국가간 공동 작업 Paper 수
result_dict = {}
for eid in data_dict :
    cur_eid = data_dict[eid]
    cur_affs = cur_eid['aff_country']
    processed = []
    for i in range(len(cur_affs)) :
        t_cur = cur_affs[:]
        t_cur = [item for item in t_cur if item != cur_affs[i]]
        t_cur = set(t_cur)

        if cur_affs[i] not in result_dict :
            result_dict[cur_affs[i]] = {}
            result_dict[cur_affs[i]]["self"] = 0

        if cur_affs[i] not in processed :
            result_dict[cur_affs[i]]["self"] += 1

            for j in t_cur :
                if j not in result_dict[cur_affs[i]] :
                    result_dict[cur_affs[i]][j] = 0
                result_dict[cur_affs[i]][j] += 1
            processed.append(cur_affs[i])
        
###각 국가간 공동 Citation 수
result_dict_cite = {}
for eid in data_dict :
    cur_eid = data_dict[eid]
    cur_affs = cur_eid['aff_country']
    cur_citation = cur_eid['citedby_count']
    processed = []
    for i in range(len(cur_affs)) :
        t_cur = cur_affs[:]
        t_cur = [item for item in t_cur if item != cur_affs[i]]
        t_cur = set(t_cur)

        if cur_affs[i] not in result_dict_cite :
            result_dict_cite[cur_affs[i]] = {}
            result_dict_cite[cur_affs[i]]["self"] = 0

        if cur_affs[i] not in processed :
            result_dict_cite[cur_affs[i]]["self"] += cur_citation

            for j in t_cur :
                if j not in result_dict_cite[cur_affs[i]] :
                    result_dict_cite[cur_affs[i]][j] = 0
                result_dict_cite[cur_affs[i]][j] += cur_citation
            processed.append(cur_affs[i])

###각 국가 간 저자 순서에 따른 Score (1, 1/2, 1/3 ...)
result_dict_auth_rank = {}
for eid in data_dict :
    cur_eid = data_dict[eid]
    cur_affs = cur_eid['aff_country']
    cur_score = {}
    for i in range(len(cur_affs)) :
        if cur_affs[i] not in cur_score :
            cur_score[cur_affs[i]] = 0
        cur_score[cur_affs[i]] += 1/(i+1)

    for af in cur_score :
        if af not in result_dict_auth_rank :
            result_dict_auth_rank[af] = {}
            result_dict_auth_rank[af]["self"] = 0
        
        result_dict_auth_rank[af]["self"] += cur_score[af]
        for inner_af in cur_score :
            if af != inner_af :
                if inner_af not in result_dict_auth_rank[af] :
                    result_dict_auth_rank[af][inner_af] = 0
                result_dict_auth_rank[af][inner_af] += cur_score[inner_af]


###각 국가 간 저자 순서에 따른 Score (한 논문에서 가장 높은 경우만)
result_dict_auth_rank_2 = {}
for eid in data_dict :
    cur_eid = data_dict[eid]
    cur_affs = cur_eid['aff_country']
    cur_score = {}
    for i in range(len(cur_affs)) :
        if cur_affs[i] not in cur_score :
            cur_score[cur_affs[i]] = 1/(i+1)

    for af in cur_score :
        if af not in result_dict_auth_rank_2 :
            result_dict_auth_rank_2[af] = {}
            result_dict_auth_rank_2[af]["self"] = 0
        
        result_dict_auth_rank_2[af]["self"] += cur_score[af]
        for inner_af in cur_score :
            if af != inner_af :
                if inner_af not in result_dict_auth_rank_2[af] :
                    result_dict_auth_rank_2[af][inner_af] = 0
                result_dict_auth_rank_2[af][inner_af] += cur_score[inner_af]


###각 국가 간 저자 순서에 따른 Score (Citation 중, 저자 순서 Score 비례만큼)
result_dict_auth_rank_3 = {}
for eid in data_dict :
    cur_eid = data_dict[eid]
    cur_affs = cur_eid['aff_country']
    cur_citation = cur_eid['citedby_count']
    cur_score = {}
    total = 0
    for i in range(len(cur_affs)) :
        if cur_affs[i] not in cur_score :
            cur_score[cur_affs[i]] = 0
        cur_score[cur_affs[i]] += 1/(i+1)
        total += 1/(i+1)

    for af in cur_score :
        if af not in result_dict_auth_rank_3 :
            result_dict_auth_rank_3[af] = {}
            result_dict_auth_rank_3[af]["self"] = 0
        
        result_dict_auth_rank_3[af]["self"] += (cur_citation * (cur_score[af]/total))
        for inner_af in cur_score :
            if af != inner_af :
                if inner_af not in result_dict_auth_rank_3[af] :
                    result_dict_auth_rank_3[af][inner_af] = 0
                result_dict_auth_rank_3[af][inner_af] += (cur_citation * (cur_score[af]/total))

In [14]:
save_dict_to_json(result_dict, "number_cowork.json")
save_dict_to_json(result_dict_cite, "number_cocite.json")
save_dict_to_json(result_dict_auth_rank, "number_cowork_authrank_sum.json")
save_dict_to_json(result_dict_auth_rank_2, "number_cowork_authrank_max.json")
save_dict_to_json(result_dict_auth_rank_3, "number_cowork_propCite.json")


In [9]:
import operator


dict(sorted(result_dict['Japan'].items(), key=operator.itemgetter(1), reverse=True))

{'self': 8746,
 'United States': 1071,
 'China': 604,
 'United Kingdom': 345,
 'Germany': 254,
 'Canada': 238,
 'France': 221,
 'Australia': 190,
 'Singapore': 135,
 'Hong Kong': 99,
 'Switzerland': 83,
 'South Korea': 74,
 'Netherlands': 66,
 'Italy': 65,
 'Austria': 63,
 'Finland': 60,
 'Taiwan': 58,
 'India': 50,
 'Denmark': 43,
 'Sweden': 41,
 'Israel': 40,
 'Czech Republic': 36,
 'Spain': 36,
 'Thailand': 29,
 'Belgium': 27,
 'Portugal': 23,
 'Poland': 20,
 'Viet Nam': 17,
 'Norway': 17,
 'New Zealand': 17,
 'Malaysia': 15,
 'Ireland': 15,
 'Indonesia': 14,
 'United Arab Emirates': 13,
 'Egypt': 12,
 'Brazil': 12,
 'Chile': 9,
 'Hungary': 9,
 'Saudi Arabia': 9,
 'Greece': 9,
 'Turkey': 8,
 'Russian Federation': 7,
 'Slovenia': 6,
 'Mexico': 6,
 'Qatar': 5,
 'Philippines': 5,
 'Argentina': 4,
 'Bangladesh': 4,
 'Latvia': 4,
 'Luxembourg': 4,
 'Namibia': 4,
 'Iran': 3,
 'Pakistan': 3,
 'Estonia': 2,
 'Cameroon': 2,
 'Serbia': 2,
 'Myanmar': 2,
 'Croatia': 2,
 'South Africa': 2,
 'Ec

In [10]:
result_dict

{'United States': {'self': 123102,
  'France': 1977,
  'Germany': 3312,
  'Canada': 3995,
  'United Kingdom': 4303,
  'Israel': 2721,
  'Netherlands': 913,
  'Japan': 1071,
  'Brazil': 455,
  'Italy': 1193,
  'India': 1515,
  'Austria': 550,
  'Taiwan': 460,
  'Hungary': 71,
  'Slovenia': 42,
  'Singapore': 1464,
  'Cyprus': 35,
  'Switzerland': 1946,
  'Hong Kong': 1778,
  'Australia': 1564,
  'Egypt': 63,
  'Norway': 189,
  'Nigeria': 6,
  'Denmark': 519,
  'Poland': 174,
  'Finland': 380,
  'Greece': 294,
  'South Korea': 1232,
  'Spain': 724,
  'Belgium': 359,
  'Sweden': 560,
  'China': 9766,
  'Portugal': 224,
  'South Africa': 56,
  'Ireland': 253,
  'Bulgaria': 17,
  'Russian Federation': 150,
  'Czech Republic': 200,
  'Estonia': 22,
  'Qatar': 224,
  'New Zealand': 139,
  'Mexico': 83,
  'Serbia': 17,
  'Iceland': 26,
  'Uruguay': 16,
  'Turkey': 175,
  'Grenada': 1,
  'Venezuela': 10,
  'Saudi Arabia': 257,
  'Chile': 82,
  'Latvia': 12,
  'Slovakia': 10,
  'Macao': 51,
  'T

In [30]:
dict(sorted(result_dict_cite['Japan'].items(), key=operator.itemgetter(1), reverse=True))

{'total_citation': 205161,
 'United States': 48230,
 'China': 25138,
 'United Kingdom': 11567,
 'Germany': 8997,
 'France': 8490,
 'Australia': 7734,
 'Singapore': 7289,
 'Canada': 6386,
 'Switzerland': 4756,
 'Czech Republic': 4742,
 'Hong Kong': 4276,
 'Italy': 3862,
 'Sweden': 2307,
 'South Korea': 2084,
 'Austria': 1756,
 'Taiwan': 1722,
 'Spain': 1624,
 'Denmark': 1235,
 'India': 1068,
 'Israel': 911,
 'Netherlands': 887,
 'Brazil': 863,
 'Belgium': 771,
 'Finland': 769,
 'Saudi Arabia': 557,
 'Slovenia': 533,
 'Thailand': 501,
 'Turkey': 462,
 'Norway': 379,
 'New Zealand': 344,
 'Iran': 314,
 'Greece': 309,
 'Egypt': 291,
 'Malaysia': 270,
 'Ukraine': 269,
 'Qatar': 217,
 'Portugal': 207,
 'United Arab Emirates': 189,
 'Russian Federation': 182,
 'Chile': 176,
 'Colombia': 164,
 'Poland': 144,
 'Philippines': 115,
 'Ireland': 112,
 'Viet Nam': 104,
 'Luxembourg': 94,
 'Hungary': 91,
 'Indonesia': 64,
 'Mexico': 56,
 'Latvia': 55,
 'Argentina': 50,
 'Serbia': 26,
 'Pakistan': 16,

In [31]:
dict(sorted(result_dict_auth_rank['Japan'].items(), key=operator.itemgetter(1), reverse=True))


{'total_score': 12926.966555580208,
 'United States': 1019.8022591820512,
 'China': 723.2514603458749,
 'United Kingdom': 256.76522562684005,
 'Germany': 220.36879807707123,
 'France': 194.22989476497003,
 'Canada': 178.2653821296703,
 'Australia': 148.33119092974133,
 'Singapore': 111.56459316422128,
 'Hong Kong': 75.32108324596248,
 'Switzerland': 67.12660827292702,
 'South Korea': 59.76612425795496,
 'Taiwan': 58.67015940797889,
 'Finland': 47.16245039682541,
 'Italy': 46.56745434315557,
 'Netherlands': 43.75666910703677,
 'Austria': 43.50016944195335,
 'India': 43.34226952781697,
 'Thailand': 36.42132352941176,
 'Israel': 34.58485958485958,
 'Belgium': 24.44988034493666,
 'Denmark': 24.239189596093617,
 'Spain': 23.774554502742493,
 'Sweden': 20.931851919245503,
 'Czech Republic': 20.413500826658716,
 'Viet Nam': 17.192857142857143,
 'Indonesia': 14.686751318304712,
 'New Zealand': 14.02738095238095,
 'Brazil': 13.561111111111108,
 'Portugal': 12.84058068440091,
 'Ireland': 12.6261

In [32]:
dict(sorted(result_dict_auth_rank_2['Japan'].items(), key=operator.itemgetter(1), reverse=True))


{'total_score': 7657.859081749032,
 'United States': 673.0944455054754,
 'China': 447.4255487291062,
 'United Kingdom': 194.0919043374926,
 'Germany': 154.8847500063028,
 'France': 141.73120300751881,
 'Canada': 136.69024650666276,
 'Australia': 106.7359113300493,
 'Singapore': 77.74636046638336,
 'Hong Kong': 58.08839335329278,
 'Switzerland': 51.105357142857144,
 'South Korea': 41.67874691378528,
 'Taiwan': 39.63061840120663,
 'Italy': 34.81122994652406,
 'Netherlands': 34.06222943722943,
 'Austria': 33.73710317460318,
 'Finland': 32.28625992063491,
 'India': 29.428927203065133,
 'Israel': 24.55589133089133,
 'Thailand': 23.4546568627451,
 'Denmark': 21.563224683812912,
 'Spain': 19.171668512366182,
 'Belgium': 18.51834733893557,
 'Sweden': 17.8364676312257,
 'Czech Republic': 16.573934837092732,
 'Viet Nam': 12.866666666666665,
 'Poland': 10.683333333333332,
 'Portugal': 10.637426431246654,
 'Norway': 10.136446886446887,
 'New Zealand': 9.385714285714286,
 'Ireland': 9.2166666666666

In [34]:
dict(sorted(result_dict_auth_rank_3['Japan'].items(), key=operator.itemgetter(1), reverse=True))


{'total_score': 143652.53904148727,
 'United States': 17234.540243217245,
 'China': 7267.861694860035,
 'United Kingdom': 4053.293090211,
 'Germany': 2932.6997620763914,
 'Australia': 2523.1947950556055,
 'France': 2443.501154963467,
 'Canada': 2384.2638425876607,
 'Singapore': 1799.8805197546158,
 'Switzerland': 1105.120523226237,
 'Czech Republic': 1025.8949362844737,
 'Hong Kong': 1013.9273147213837,
 'Taiwan': 729.7071589961972,
 'Italy': 640.888390500238,
 'South Korea': 489.2768568258758,
 'Sweden': 458.8220897797935,
 'Denmark': 363.6236007672266,
 'Israel': 356.48493922727397,
 'India': 337.6564367777424,
 'Netherlands': 331.6092669051479,
 'Austria': 328.542244867898,
 'Finland': 300.5738369293447,
 'Spain': 256.3291858702343,
 'Belgium': 170.58640851357364,
 'Thailand': 151.23409890633408,
 'Turkey': 135.68082707435676,
 'Brazil': 89.38571463747148,
 'Chile': 86.40094003187826,
 'Portugal': 78.94964559807192,
 'Norway': 69.09584086884446,
 'Qatar': 61.03887405051535,
 'Egypt'