In [1]:
import pandas as pd
import csv
from collections import defaultdict
import os

In [2]:
file_path = 'filtered_authors.csv'
processed_lines = []

with open(file_path, 'r', encoding='utf-8') as file:
    reader = csv.reader(file)
    for line in reader:
        if len(line) > 4:
            line[3] = line[3] + ' ' + ' '.join(line[4:])
            new_line = line[:4]
        else:
            new_line = line + [''] * (4 - len(line))
        processed_lines.append(new_line)
        
data = pd.DataFrame(processed_lines)
data

Unnamed: 0,0,1,2,3
0,2017,Public Transportation Mode Detection from Cell...,CIKM,"Guanyao Li,Chun-Jie Chen,Sheng-Yun Huang,Ai-Jo..."
1,2019,DSANet: Dual Self-Attention Network for Multiv...,CIKM,"Siteng Huang,Donglin Wang,Xuehan Wu,Ao Tang"
2,2020,Multi-task Adversarial Spatial-Temporal Networ...,CIKM,"Senzhang Wang,Hao Miao,Hao Chen,Zhiqiu Huang"
3,2020,A Framework for Analyzing the Impact of Missin...,CIKM,"Fabiola Santore,Eduardo Cunha de Almeida,Wagne..."
4,2020,Auxiliary-task Based Deep Reinforcement Learni...,CIKM,"Wei Shen 0005,Xiaonan He,Chuheng Zhang,Qiang N..."
...,...,...,...,...
8443,2020,GeoSiteSearch: A Tool to Map Vietnamese Diaspo...,ICWSM,"Madison G. Masten,Thien-Huong Ninh,Nicholas Tran"
8444,2020,Auditing Race and Gender Discrimination in Onl...,ICWSM,"Joshua Asplund,Motahhare Eslami,Hari Sundaram,..."
8445,2020,Driving the Last Mile: Characterizing and Unde...,ICWSM,"Hemank Lamba,Shashank Srikanth,Dheeraj Reddy P..."
8446,2018,Automatically Conceptualizing Social Media Ana...,ICWSM,"Soon-Gyo Jung,Joni O. Salminen,Jisun An,Haewoo..."


In [3]:
data = pd.DataFrame(processed_lines, columns=['Year', 'Title', 'Conference', "Authors"])

In [4]:
data.head(5)

Unnamed: 0,Year,Title,Conference,Authors
0,2017,Public Transportation Mode Detection from Cell...,CIKM,"Guanyao Li,Chun-Jie Chen,Sheng-Yun Huang,Ai-Jo..."
1,2019,DSANet: Dual Self-Attention Network for Multiv...,CIKM,"Siteng Huang,Donglin Wang,Xuehan Wu,Ao Tang"
2,2020,Multi-task Adversarial Spatial-Temporal Networ...,CIKM,"Senzhang Wang,Hao Miao,Hao Chen,Zhiqiu Huang"
3,2020,A Framework for Analyzing the Impact of Missin...,CIKM,"Fabiola Santore,Eduardo Cunha de Almeida,Wagne..."
4,2020,Auxiliary-task Based Deep Reinforcement Learni...,CIKM,"Wei Shen 0005,Xiaonan He,Chuheng Zhang,Qiang N..."


In [5]:
last_five_years = data['Year'].unique()

In [6]:
output_dir = "Coauthorships"
os.makedirs(output_dir)

for year in last_five_years:
    year_df = data[data['Year'] == year]
    
    coauthorship_graph = defaultdict(int)
    
    for _, row in year_df.iterrows():
        authors = [author.strip() for author in row['Authors'].split(',')]
        
        for i in range(len(authors)):
            for j in range(i + 1, len(authors)):
                key = tuple(sorted([authors[i], authors[j]]))
                coauthorship_graph[key] += 1

    coauthorship_graph_csv = [
        f"{key[0]},{key[1]},{weight},{year}"
        for key, weight in coauthorship_graph.items()
    ]
    
    with open(os.path.join(output_dir, f'coauthorship_{year}.csv'), 'w', encoding='utf-8') as f:
        f.write("from,to,weight,Year\n")
        f.write("\n".join(coauthorship_graph_csv))

    print(f"Co-authorship graph for {year} saved.")

Co-authorship graph for 2017 saved.
Co-authorship graph for 2019 saved.
Co-authorship graph for 2020 saved.
Co-authorship graph for 2016 saved.
Co-authorship graph for 2018 saved.
