In [118]:
import os, json
from dotenv import load_dotenv
import random
load_dotenv()
import datetime
from tqdm import tqdm

# Using OpenAI directly

In [129]:
import openai

def extract_topics(title, description):
    system_msg = "I am an AI assistant designed to help you extract the related field of computer science from a given text. \
    I understand that the text contains a title and a description of an academic paper and I am capable of identifying the field of computer science the paper is addressing. \
    I understand that I am only allowed to extract topics related to in the field of computer science and the topics must be generic. \
    Examples of generic fields of computer science are 'Distributed Systems', 'Computer Vision', 'Graph Mining', 'Erasure Coding', 'Bioinformatics', 'Operating Systems', 'Information Retrieval'. \
    The topics must be related to computer science. \
    I also understand that after extracting the topic, \
    My response must be a must be wrapped in `<result>`, eg: `<result>topic<result>` replace topic with the topic that sums up the academic paper."

    prompt = f"""
    Based on the title and description below, label the academic paper with the main field of computer science that it is addressing. 
    Your responsemust be a must be wrapped in `<result>`, eg: `<result>topic<result>` replace topic with the topic that sums up the academic paper.

    Title:

    {title}

    Description:

    {description}
    """
    while True:
        try:
            response = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                messages=[{"role": "system", "content": system_msg },
                        {"role": "user", "content": prompt}
                ],
                request_timeout=10
            ) 
            output = response['choices'][0]['message']['content']
            if output.startswith('<result>') and output.endswith('<result>'):
                output = output.strip('<result>')
                return output
            else:
                print(output)
        except Exception as e:
            print('Trying Again!')

In [58]:
output_dir = './research_interest'
os.makedirs(output_dir, exist_ok=True)

cur_year = datetime.datetime.now().year
min_year = cur_year - 1

dir = './processed_publications'
files = [os.path.join(dir, f) for f in os.listdir(dir) if os.path.isfile(os.path.join(dir, f))]

for i, file in enumerate(files[53:]):
    with open(file, 'r') as f:
        pubs = json.load(f)
    print(i, file)

    file_name = file.split('/')[2]
    for pub in tqdm(pubs):
        if pub['description'] is not None and pub['publication_year'] is not None and int(pub['publication_year'])>=min_year:
            topic = extract_topics(pub['title'], pub['description'])
            pub['topic'] = topic
            pass
        else:
            pub['topic'] = None
    
        with open(f"{output_dir}/{file_name}", 'w') as f:
            json.dump(pubs, f)
    

0 ./processed_publications/chen_change_loy.json


 71%|███████   | 197/277 [01:16<01:40,  1.26s/it]

The main field of computer science that this academic paper is addressing is "Computer Vision".
Trying Again!


 73%|███████▎  | 201/277 [01:38<03:47,  2.99s/it]

Trying Again!


 75%|███████▌  | 209/277 [01:58<01:54,  1.68s/it]

Trying Again!


 78%|███████▊  | 215/277 [02:14<01:53,  1.82s/it]

Trying Again!


 80%|███████▉  | 221/277 [02:31<01:37,  1.75s/it]

Trying Again!
Trying Again!


 83%|████████▎ | 231/277 [03:03<01:06,  1.45s/it]

Trying Again!


 86%|████████▌ | 238/277 [03:27<01:26,  2.21s/it]

Trying Again!


 89%|████████▉ | 247/277 [03:56<01:00,  2.02s/it]

Trying Again!


100%|██████████| 277/277 [05:00<00:00,  1.09s/it]


1 ./processed_publications/anupam_chattopadhyay.json


 10%|█         | 39/381 [00:00<00:01, 192.61it/s]

Trying Again!


 56%|█████▋    | 215/381 [00:37<00:25,  6.53it/s]

Trying Again!
<result>Hardware Security</result>


 80%|███████▉  | 303/381 [01:17<00:58,  1.33it/s]

Trying Again!


 81%|████████  | 307/381 [01:34<01:45,  1.43s/it]

Trying Again!


 81%|████████▏ | 310/381 [01:56<03:14,  2.75s/it]

<result>Hardware Security</result>


100%|██████████| 381/381 [02:12<00:00,  2.87it/s]


2 ./processed_publications/bo_an.json


 42%|████▏     | 120/286 [00:11<00:23,  7.21it/s]

Trying Again!


 47%|████▋     | 133/286 [00:26<00:58,  2.62it/s]

Trying Again!


 71%|███████   | 202/286 [00:58<00:49,  1.68it/s]

Trying Again!


 76%|███████▌  | 216/286 [01:24<01:40,  1.43s/it]

Trying Again!


 81%|████████  | 232/286 [01:45<00:38,  1.40it/s]

Trying Again!


 83%|████████▎ | 236/286 [02:06<01:47,  2.15s/it]

Trying Again!


 84%|████████▎ | 239/286 [02:29<03:10,  4.06s/it]

Trying Again!
Trying Again!


 90%|████████▉ | 257/286 [03:10<00:52,  1.80s/it]

Trying Again!
Trying Again!


 91%|█████████▏| 261/286 [03:39<01:33,  3.74s/it]

Trying Again!


 92%|█████████▏| 262/286 [03:51<02:22,  5.95s/it]

Trying Again!


 92%|█████████▏| 264/286 [04:04<02:09,  5.87s/it]

Trying Again!


100%|██████████| 286/286 [04:31<00:00,  1.05it/s]


3 ./processed_publications/erik_cambria.json


  8%|▊         | 36/458 [00:01<00:13, 32.12it/s]

Trying Again!


 18%|█▊        | 82/458 [00:17<01:07,  5.61it/s]

Trying Again!


 26%|██▌       | 119/458 [00:30<01:21,  4.18it/s]

Trying Again!


 33%|███▎      | 151/458 [00:47<01:50,  2.78it/s]

Trying Again!


 35%|███▌      | 162/458 [00:59<03:01,  1.63it/s]

Trying Again!


 38%|███▊      | 174/458 [01:16<03:48,  1.24it/s]

Trying Again!


 42%|████▏     | 191/458 [01:34<02:35,  1.72it/s]

<result>Affective Computing</result>
Trying Again!


 42%|████▏     | 192/458 [01:52<08:54,  2.01s/it]

<result>Affective Computing</result>
<result>Affective Computing</result>
Trying Again!
<result>Affective Computing</result>
<result>Affective Computing</result>
<result>Affective Computing</result>
<result>Affective Computing</result>
<result>Affective Computing</result>
<result>Affective Computing</result>
<result>Affective Computing</result>
<result>Affective Computing</result>
<result>Affective Computing</result>


 50%|█████     | 229/458 [02:32<02:14,  1.71it/s]

<result>Multimodal Sentiment Analysis</result>
<result>Affective Computing</result>
<result>Affective Computing</result>
<result>Affective Computing</result>


 51%|█████     | 232/458 [02:42<04:18,  1.14s/it]

Trying Again!
<result>Natural Language Processing</result>
Trying Again!


 52%|█████▏    | 236/458 [03:06<10:14,  2.77s/it]

The main field of computer science that this academic paper is addressing is Natural Language Processing (NLP).


 57%|█████▋    | 260/458 [03:24<02:11,  1.51it/s]

Trying Again!


 78%|███████▊  | 358/458 [04:36<01:07,  1.48it/s]

Trying Again!


 78%|███████▊  | 359/458 [04:47<03:49,  2.32s/it]

Trying Again!


 83%|████████▎ | 378/458 [05:17<01:52,  1.40s/it]

Trying Again!


 88%|████████▊ | 404/458 [06:13<01:10,  1.30s/it]

Trying Again!


 95%|█████████▍| 435/458 [06:47<00:36,  1.58s/it]

Trying Again!


 96%|█████████▌| 439/458 [07:02<00:42,  2.26s/it]

Trying Again!


 98%|█████████▊| 448/458 [07:26<00:16,  1.62s/it]

<result>Social Media Analytics</result>


100%|██████████| 458/458 [07:29<00:00,  1.02it/s]


4 ./processed_publications/chng_eng_siong.json


 47%|████▋     | 163/344 [00:41<00:52,  3.46it/s]

Trying Again!
Trying Again!


 57%|█████▋    | 197/344 [01:10<00:48,  3.06it/s]

Trying Again!


 74%|███████▎  | 253/344 [01:54<00:18,  4.85it/s]

Trying Again!


 74%|███████▎  | 253/344 [02:14<00:18,  4.85it/s]

Trying Again!


 79%|███████▉  | 271/344 [02:32<00:56,  1.29it/s]

Trying Again!


 79%|███████▉  | 271/344 [02:44<00:56,  1.29it/s]

Trying Again!
Trying Again!


 80%|███████▉  | 275/344 [03:19<03:44,  3.25s/it]

Trying Again!


 85%|████████▌ | 294/344 [03:44<00:35,  1.42it/s]

Trying Again!


 87%|████████▋ | 300/344 [03:47<00:51,  1.16s/it]

Trying Again!


 89%|████████▊ | 305/344 [04:13<01:38,  2.54s/it]

Trying Again!


100%|██████████| 344/344 [04:39<00:00,  1.23it/s]


5 ./processed_publications/guan_cuntai.json


 57%|█████▋    | 143/249 [00:12<00:11,  8.89it/s]

Trying Again!


 57%|█████▋    | 143/249 [00:25<00:11,  8.89it/s]

Trying Again!


 90%|████████▉ | 223/249 [00:45<00:05,  4.42it/s]

<result>Biometrics</result>


100%|██████████| 249/249 [00:57<00:00,  4.35it/s]


6 ./processed_publications/owen_noel_newton_fernando.json


 41%|████      | 52/128 [00:00<00:00, 513.35it/s]

Trying Again!


 75%|███████▌  | 96/128 [00:20<00:08,  3.83it/s] 

Trying Again!
Trying Again!


100%|██████████| 128/128 [00:45<00:00,  2.78it/s]


7 ./processed_publications/alexei_sourin.json


100%|██████████| 237/237 [00:07<00:00, 30.38it/s]


8 ./processed_publications/long_cheng.json


 46%|████▋     | 52/112 [00:20<00:52,  1.14it/s]

Trying Again!


 69%|██████▉   | 77/112 [00:47<00:52,  1.51s/it]

<result>Data Mining</result>
<result>Data Mining</result>
<result>Data Mining</result>
<result>Data Mining</result>
<result>Data Mining</result>


 74%|███████▍  | 83/112 [00:58<00:43,  1.51s/it]

Trying Again!


 77%|███████▋  | 86/112 [01:13<01:04,  2.47s/it]

<result>Data Imputation</result>


 79%|███████▊  | 88/112 [01:19<01:02,  2.62s/it]

<result>Data Mining</result>
<result>Data Mining</result>
<result>Data Mining</result>
<result>Data Mining</result>
<result>Data Mining</result>
<result>Data Mining</result>
<result>Data Mining</result>
<result>Data Mining</result>
<result>Data Mining</result>
<result>Data Mining</result>
<result>Data Mining</result>
<result>Data Mining</result>
<result>Data Mining</result>
<result>Data Mining</result>
<result>Data Mining</result>
<result>Data Mining</result>


 96%|█████████▋| 108/112 [02:11<00:08,  2.08s/it]

<result>Data Analytics</result>


100%|██████████| 112/112 [02:13<00:00,  1.20s/it]


9 ./processed_publications/yeo_chai_kiat.json


 34%|███▍      | 109/317 [00:01<00:04, 49.79it/s]

Trying Again!


 40%|███▉      | 126/317 [00:14<00:44,  4.28it/s]

Trying Again!


 46%|████▌     | 145/317 [00:28<01:01,  2.78it/s]

Trying Again!


 46%|████▌     | 145/317 [00:40<01:01,  2.78it/s]

Trying Again!


 74%|███████▍  | 235/317 [01:20<00:16,  4.93it/s]

Trying Again!


 76%|███████▋  | 242/317 [01:34<01:00,  1.23it/s]

<result>Graph Mining</result>


100%|██████████| 317/317 [01:46<00:00,  2.98it/s]


10 ./processed_publications/lim_wei_yang_bryan.json


 14%|█▎        | 9/66 [00:04<00:25,  2.27it/s]

<result>Networking and Communication</result>
<result>Networking</result>
Trying Again!


 33%|███▎      | 22/66 [00:23<00:39,  1.11it/s]

Trying Again!


 45%|████▌     | 30/66 [00:39<00:42,  1.19s/it]

Trying Again!


 83%|████████▎ | 55/66 [01:16<00:17,  1.55s/it]

Trying Again!


 85%|████████▍ | 56/66 [01:32<00:55,  5.58s/it]

Trying Again!


 88%|████████▊ | 58/66 [01:45<00:45,  5.67s/it]

<result>Data Recovery</result>
<result>Data Recovery</result>


100%|██████████| 66/66 [01:53<00:00,  1.72s/it]


11 ./processed_publications/cham_tat_jen.json


 66%|██████▌   | 88/133 [00:16<00:13,  3.34it/s]

<result>Computer Vision</result>


100%|██████████| 133/133 [00:30<00:00,  4.41it/s]


12 ./processed_publications/tan_rui.json


 56%|█████▌    | 93/167 [00:02<00:01, 62.88it/s]

<result>Data Center Management</result>
Trying Again!


 85%|████████▌ | 142/167 [00:51<00:17,  1.43it/s]

Trying Again!
<result>Data Center Cooling</result>


 86%|████████▌ | 143/167 [01:07<00:31,  1.30s/it]

Trying Again!


 87%|████████▋ | 145/167 [01:20<00:39,  1.80s/it]

<result>Wireless Communication</result>


 87%|████████▋ | 145/167 [01:40<00:39,  1.80s/it]

The main field of computer science that this academic paper is addressing is 'Localization and Mapping'.


100%|██████████| 167/167 [01:49<00:00,  1.53it/s]


# Grouping Topics

In [61]:
dir = './research_interest'
files = [os.path.join(dir, f) for f in os.listdir(dir) if os.path.isfile(os.path.join(dir, f))]

interests = {}
for file in files:
    with open(file, 'r') as f:
        pubs = json.load(f)
    
    for pub in pubs:
        interests[pub['topic']] = interests.get(pub['topic'], 0) + 1

len(interests)

534

In [190]:
import openai

def group_topics(keywords):
    system_msg = "I am an AI assistant designed to help you group related keywords together. \
    I understand that I will receive a list of computer science realted keywords and I am able to understand each keyword. \
    My response be a must a list of lists eg: `[1,2,3,4],[q,w,e]` where within each list consist of keywords that are related to each other "

    prompt = f"""
    Based on the list of keywords provided below, help me to group them. 
    Your response must be a list of list where within each list are keywords that are related to each other.
    Wrap the list of list in `<result>`. eg `<result>[[1,2,3,4],[q,w,e]]<result>`.

    Keywords:

    {keywords}
    """
    while True:
        try:
            response = openai.ChatCompletion.create(
                model="gpt-4",
                messages=[{"role": "system", "content": system_msg },
                        {"role": "user", "content": prompt}
                ],
                request_timeout=60*15,
                temperature=0.2
            ) 
            return response['choices'][0]['message']['content']
        except Exception as e:
            print(e)
            print('Trying Again!')

In [None]:
all_outputs = []
for i in tqdm(range(0, len(sorted_keys)-50, 50)):
    output = group_topics(sorted_keys[i:i+50])
    all_outputs.append(output)

all_outputs.append(group_topics(sorted_keys[len(sorted_keys)-50-1:]))

In [116]:
all_outputs.append(group_topics(sorted_keys[len(sorted_keys)-50-1:]))

In [162]:
all_outputs = ["<result>[\n    ['Computer Vision', 'Image Processing', 'Video Processing', 'Video Quality Assessmen'],\n    ['Natural Language Processing', 'Natural Language Processing (NLP)', 'Speech Processing', 'Speech Recognition', 'Speech Emotion Recognition', 'Information Extraction'],\n    ['Distributed System', 'Edge Computing', 'Mobile Edge Computing', 'Federated Learning', 'Privacy-Preserving Federated Learning'],\n    ['Machine Learning', 'Reinforcement Learning', 'Artificial Intelligenc', 'Transfer Learning', 'Domain Adaptation', 'Domain Generalization', 'Privacy-Preserving Machine Learning'],\n    ['Wireless Communication', 'Wireless Network', 'Wireless Networking', 'Wireless Communication Network', 'Networking and Communication', 'Networking', 'Network Slicing'],\n    ['Graph Mining', 'Graph Neural Network', 'Knowledge Graph'],\n    ['Information Retrieva', 'Recommender System', 'Recommendation System'],\n    ['Quantum Computing'],\n    ['Computer Graphic'],\n    ['Software Engineering', 'Software Security'],\n    ['Bioinformatic'],\n    ['Data Mining'],\n    ['Network Security', 'Cybersecurity', 'Security', 'Cryptography'],\n    ['Internet of Things (IoT)'],\n    ['Optimization'],\n    ['Blockchain'],\n    ['Database System'],\n    ['Robotic']\n]<result>",
 "<result>[['Knowledge Graph Completion', 'Adversarial Machine Learning', 'Multi-Agent Reinforcement Learning', 'Game Theory', 'Collaborative Filtering', 'Anomaly Detection', 'Deep Learning', 'Multimodal Learning', 'Meta-Learning', 'Sequential Recommendation', 'Generative Adversarial Networks (GANs)', 'Artificial Intelligence and Machine Learning', 'Knowledge Distillation', 'Adversarial Attack', 'Continual Learning', 'Neural Architecture Search (NAS)', 'Evolutionary Computation', 'Neural Network'], ['Wireless Communication System', 'Wireless Sensor Network', 'Mobile Edge Computing (MEC)', 'Mobile Computing', 'Wireless System'], ['Information Security', 'Privacy-Preserving Data Analysi', 'Communication Network', 'Hardware Security', 'Side-Channel Attack', 'Secure Communication', 'Data Privacy and Security', 'Network Analysi', 'Security and Privacy', 'Privacy-preserving Machine Learning'], ['Cloud Computing', 'Hardware Acceleration', 'Computer Architec', 'Operating System', 'Embedded System'], ['Signal Processing', 'Audio Processing', 'Speech Enhancemen', 'Geometry Processing'], ['Software Testing', 'Code Search', 'Formal Method'], ['Emotion Recognition', 'Human-Computer Interaction', 'Human-Computer Interaction (HCI)', 'Biometric']]<result>",
 "<result>[['Data Structures and Algorithm', 'Geometric Algorithm', 'Graph Algorithm'], ['Image Compression', 'Image Generation', 'Image Quality Assessmen'], ['Social Media Analysi', 'Sentiment Analysi', 'Topic Modeling'], ['Neural Network Pruning', 'Neural Architecture Search', 'Adversarial Attacks on Deep Neural Network', 'Deep Reinforcement Learning'], ['Cryptanalysi', 'Blockchain and Cryptocurrency'], ['Speech Separation', 'Audio-Visual Speech Recognition', 'Audio Classification'], ['Wireless Sensing', 'Wireless Communications and Networking', 'Wireless Communication and Networking', 'Wireless Security', 'Physical Layer Authentication'], ['Time Series Forecasting', 'Operations Research'], ['Interpretable Federated Learning', 'Privacy and Security in Federated Learning', 'Blockchain-Based Federated Learning'], ['Database Management System', 'Community Search'], ['Healthcare Technology', 'Activity Recognition', 'Human Activity Recognition'], ['Data Augmentation', 'Explainable Artificial Intelligence (XAI)', 'Program Representation Learning'], ['Virtual Reality', 'Autonomous Vehic', 'Autonomous System'], ['Semantic Computing', 'Cyber-Physical System', 'Quantum Networking'], ['Intrusion Detection System', 'Reputation Managemen', 'Power System'], ['Blockchain-based Semantic Exchange Framework', 'Parallel Computing', 'Multi-view Clustering'], ['Computational Fluid Dynamics (CFD)', 'Automated Program Repai']]<result>",
 "<result>[['Erasure Coding', 'Natural Language Video Localization', 'Unsupervised Domain Adaptation', 'Graph Generation', 'Trajectory Modeling', 'Representation Learning', 'Social Network Analysis', 'Affective Computing', 'Automatic Speech Recognition (ASR)', 'Brain-Computer Interface', 'Self-supervised Learning', 'Graph-based Recommendation System', 'Semi-Supervised Learning (SSL)', 'Neuroscience and Deep Learning', 'Explainable AI', 'Computational Intelligence', 'Intelligent Voice Assistant', 'Graph Representation Learning', 'Short Text Topic Modeling', 'Emotion Recognition in Conversation', 'Neural Topic Mod', 'Unsupervised Learning', 'Entity Resolution', 'Geospatial Entity Resolution', 'Data Structures and Indexing', 'Multi-task Learning'], ['Quantitative Finance'], ['Psychology and Healthcare', 'Medical Image Report Generation'], ['Blockchain-Based Personal Health Record System', 'Blockchain-based Trust Management'], ['Ubiquitous Acoustic Sensing', 'RF-sensing and Vital-signs Monitoring', 'Acoustic Sensing'], ['Autonomous Driving'], ['Privacy-Preserving Wi-Fi Sensing', 'Security and Authentication', 'Authentication'], ['Antenna Design and Optimization', 'Electrical Engineering', 'Wireless Communication and Positioning', 'FPGA Design'], ['Geospatial Artificial Intelligence', 'Geospatial Information Retrieval', 'Geospatial Data Analysis', 'Spatial Database'], ['Constraint Satisfaction Problems (CSP)', 'Combinatorial Optimization', 'Integer Programming']]<result>",
 "<result>[['Social E-commerce', 'Advertising Optimization'], ['Deep Learning for 3D Point Cloud', '3D Representation Learning', 'Generative Modeling', 'Domain Adaptive Object Detection', 'Generative Adversarial Network', 'Graph Convolutional Network', 'Edge-Cloud Cooperation for DNN Inference via Reinforcement Learning and Supervised Learning', 'Adversarial Patch Detection', 'Time-Series Representation Learning', 'Federated Graph Neural Network', 'Machine Learning Method Recommendation', 'Knowledge Graph-aware Recommendation', 'Graph Network', 'Many-objective Optimization', 'Optimization and Game Theory'], ['Privacy and Robustness in Federated Learning', 'Mobile Augmented Reality with Federated Learning', 'Adversarial Robustness in Continual Learning', 'Privacy-Preserving Fine-Tuning of Artificial Intelligence (AI) Foundation Models with Federated Learning, Differential Privacy, Offsite Tuning, and Parameter-Efficient Fine\\xa0…', 'Privacy and robustness in federated learning', 'Privacy-Preserving Vertical Federated Learning', 'Privacy-Preserving Knowledge Transf'], ['Wireless Communication and Network Optimization', 'Wireless Communication and Network', 'Dynamic Spectrum Acc', 'Quantum Network'], ['Neuroimaging Analysis', 'Multi-Omics Data Analysis', 'Medical Imaging'], ['Adversarial Defense and Out-of-Distribution Detection', 'Code Obfuscation', 'Malware Detection', 'Database Security', 'Secure Multi-Party Delegated Authorisation', 'Privacy-preserving protocol', 'Cryptocurrency and Blockchain Technology', 'Cryptocurrencies and Blockchain Technology', 'Cryptocurrencies'], ['Real-time System', 'Acoustic Signal Processing', 'Remote Sensing', 'Physical Layer Security', 'Multi-criteria journey planning over multimodal public transport network', 'Traffic Prediction'], ['Virtual Reality (VR)', 'Information Visualization', 'Data Governance and Privacy Protection', 'Explainable AI Design']]<result>",
 "<result>[['Visual Reasoning', 'Natural Language Reasoning', 'Machine Reading Comprehension', 'Dialogue State Tracking', 'Natural Language Generation'], ['Continual Few-shot Relation Learning', 'Meta Learning'], ['Code Understanding, Generation, Translation, and Retrieval', 'Automatic Program Repair'], ['Knowledge Graph Embedding', 'Knowledge Science, Engineering and Management'], ['Artificial Intelligence and Ethic'], ['Data Quality Improvement', 'Data Visualization'], ['Privacy-Preserving Data Masking', 'Privacy-preserving Data Sharing', 'Privacy-Preserving Computation', 'Privacy-preserving decentralized deep learning', 'Secure Data Sharing', 'Privacy-Preserving Recommender System'], ['Wireless Communication and Sensing', 'Wireless Networks and Security'], ['Secure Multiparty Computation', 'Secure Machine Learning', 'Secure Inference Framework'], ['Intellectual Property Protection', 'Digital Watermarking'], ['Adversarial Training', 'Adversarial Attacks on Neural Network', 'Adversarial Attacks on Deep Learning'], ['Cryptographic Acceleration', 'Cryptographic Framework', 'Quantum Cryptography'], ['Image Steganography', 'Information Hiding'], ['Energy-efficient Computing'], ['Web Services Security'], ['Physics-Informed Neural Network', 'Graph Classification', 'Evolutionary Optimization', 'Time Series Representation Learning', 'Unsupervised Representation Learning for Time Series'], ['Health Informatics', 'Herbal Informatics', 'Virtual Reality and Augmented Reality (VR/AR) in Healthcare'], ['Blockchain-based Federated Learning', 'Cryptocurrency and Blockchain'], ['Resource Allocation and QoE Analysis', 'Network Resource Allocation'], ['Internet of Things (IoT) and Virtual Reality (VR)']]<result>",
 "<result>[\n    ['Blockchain-Based IoT', 'Blockchain and Collaborative Learning', 'Blockchain Security', 'Privacy-Preserving Blockchain-Based Federated Learning', 'Blockchain and RF-powered backscatter cognitive radio network', 'Blockchain Technology', 'Blockchain and AI-Generated Content Servic', 'Blockchain and Decentralized System', 'Blockchain and Edge Intelligenc'],\n    ['Privacy-preserving intelligent resource allocation for federated edge learning in quantum intern', 'Privacy-preserving anomaly detection', 'Secure Information Transmission', 'Secure Storage and Sharing of Personal Health Records in Internet of Medical Things (IoMT)', 'Machine Learning Security'],\n    ['Wireless Network Managemen', 'Wireless Communication and Radar Sensing', 'Wireless Sensing Data Collection and Processing', 'Wireless Communication and Network Security', 'Semantic Communication in Vehicular Network', 'Communications and Signal Processing', 'Wireless Communication and Signal Processing', 'Wireless Communication and Security', 'Wireless Communication and Radar System', 'Wireless Communications System'],\n    ['Network Optimization', 'Network Management and Configuration', 'Network Architec', 'Network Traffic Contro'],\n    ['Energy Managemen', 'Green Computing', 'Energy Efficiency in Data Cen'],\n    ['Edge AI', 'Edge and Fog Computing', 'Edge Intelligenc', 'Edge Computing and AI for 6G Network'],\n    ['Intelligent Surveillance in Maritime Transportation System', 'Multi-Agent Deep Reinforcement Learning', 'Machine Learning for Graph Problem'],\n    ['Data Compression', 'Approximate Data Struc'],\n    ['Resource Managemen', 'Algorithmic Mechanism Design'],\n    ['Video Transmission Over a Semantic Communication Approach', 'Route Planning', 'Spatial Search', 'Spatial Query System', 'Stock Market Prediction'],\n    ['Graph Processing', 'Knowledge Graph Reasoning']\n]<result>",
 "<result>[['Energy Efficiency', 'Data Center Cooling Control', 'Data Center Sustainability'], ['Financial Technology', 'Blockchain and Virtual Reality', 'Blockchain and Smart Contract'], ['Computational Fluid Dynamics/Heat Transfer (CFD/HT)', 'Simulation-based Optimization', 'Simulation Study', 'Simulation-Based Analysis', 'Intelligent Modeling and Simulation Lifecycle'], ['Symbolic Execution', 'Program Semantics Learning', 'Program Repair', 'Program Analysis', 'Program Understanding'], ['Artificial Intelligence Security', 'Adversarial Attacks in Computer Vision', 'Autonomous Systems Testing', 'Autonomous Driving System', 'Adversarial Attacks on Code Mod', 'Fairness in Deep Neural Network', 'Fairness of Neural Network'], ['Speaker Recognition System'], ['Mobile App Development', 'Mobile App Security'], ['Software Licensing', 'Software Composition Analysis (SCA)', 'Software Composition Analysis', 'Software Performance'], ['Robot Operating System'], ['Data-flow Testing', 'Bug Detection', 'Reverse Engineering and Binary Analysis', 'Memory Safety Analysis', 'Code Clone Detection', 'Algorithmic Complexity Analysis', 'Firmware Security', 'Formal Verification'], ['Biomedical Engineering'], ['Web Security'], ['Workflow Scheduling and Deployment', 'Smart Manufacturing', 'Data-driven Reliability Assessment of Complex Manufacturing System'], ['Social Network', 'Internet of Things'], ['Multi-Agent Path Finding'], ['Privacy Preserving Federated Learning'], ['Parallel and Distributed Simulation'], ['Map Matching Algorithm']]<result>",
 "<result>[['Evacuation Modeling', 'Geometric Modeling and Processing', 'Geometric Modeling'], ['Video Compression', 'Video Coding', 'Weakly Supervised Video Anomaly Detection (WSVAD)'], ['Adversarial Image Generation', 'Point Cloud Registration', '3D Reconstruction'], ['Visual Question Answering (VQA)', 'Natural Language Processing(NLP)', 'Conversational Recommendation System', 'Complex Question Answering over Knowledge Ba', 'Artificial Intelligence and Natural Language Processing'], ['Explainable AI (XAI)', 'Interpretability and Explainable AI'], ['Meta-learning', 'Few-Shot Learning', 'Novel Class Discovery'], ['Chemoinformatic', 'Artificial Intelligence in Chemistry'], ['Graphics and Visualization', 'Mixed and Augmented Reality'], ['Blockchain Interoperability', 'Data Governanc'], ['Clustering', 'Knowledge Graph Completion (KGC)', 'Dynamic network learning', 'Graph Analytic'], ['Scientometric', 'Computational Epidemiology'], ['Program Analysis and Verification', 'Software Testing and Performance Analysi', 'Performance Analysi'], ['Network Algorithm', 'Fault-tolerant computation'], ['Hardware-Aware Model Scaling', 'Neural Network Compression', 'Hardware Design and Optimization', 'Hardware Accelerato', 'Model Compression', 'Network-on-Chip (NoC)', 'Network-on-chip (NoC)'], ['File System', 'Real-Time System'], ['Medical Image Segmentation', 'Protein-Ligand Binding Prediction', 'Multimodal Sequence Learning'], ['Stochastic Optimization']]<result>",
 "<result>[['Attribution Protection', 'Cryptographic Security', 'Cryptography and Security', 'Cryptographic Primitiv', 'Side Channel Attack', 'Cryptographic Hardware Acceleration', 'Post-Quantum Cryptography'], ['Multimodal Fusion', 'Multi-modal Fusion', 'Multimodal Emotion Recognition'], ['Power Managemen', 'Performance Modeling', 'Hardware Design', 'Hardware Design and Security', 'System-on-Chip (SoC) Design', 'Processor Design and Architecture Description Languag', 'Electronic Design Automation (EDA)', 'Logic Synthesi'], ['LiDAR Semantic Segmentation', 'Video Segmentation', 'Video Restoration', 'Video Inpainting'], ['Physics Simulation', 'Emerging Computing', 'Neuromorphic Computing', 'Emerging Memory Technologi', 'Emerging Technologi', 'Nanoelectronic'], ['Privacy-preserving Federated Learning', 'Robust Machine Learning', 'Multiagent Reinforcement Learning', 'Reinforcement Learning in Financial Mark', 'Adversarial Attacks in Reinforcement Learning', 'Zero-shot learning'], ['Opponent Modeling', 'Algorithmic Trading', 'Algorithm Design and Analysi', 'Multiagent Planning', 'Temporal Reasoning'], ['Constraint Optimization Problems (COPs)', 'Causal Inferenc'], ['Healthca', 'Knowledge Base Completion', 'Statistical Analysi'], ['Neural Speech Processing', 'Automatic Speech Recognition', 'Audio-visual Speech Recognition', 'Speaker Verification', 'Spoken Keyword Spotting', 'Automatic Speaker Verification']]<result>",
 "<result>[\n    ['Constraint Optimization Problems (COPs)', 'Algorithm Design and Analysi', 'Multiagent Planning', 'Temporal Reasoning'],\n    ['Causal Inferenc', 'Statistical Analysi'],\n    ['Reinforcement Learning in Financial Mark', 'Adversarial Attacks in Reinforcement Learning'],\n    ['Multimodal Emotion Recognition', 'Zero-shot learning', 'Knowledge Base Completion', 'Neural Speech Processing', 'Xplainable artificial intelligence (XAI)', 'Neurocomputing'],\n    ['Automatic Speech Recognition', 'Audio-visual Speech Recognition', 'Speaker Verification', 'Spoken Keyword Spotting', 'Automatic Speaker Verification', 'Speaker Recognition and Clustering', 'Speech and Audio Processing', 'Audio Signal Processing'],\n    ['Healthca'],\n    ['Brain-Computer Interfaces (BCIs)', 'Brain-Computer Interfaces (BCI)'],\n    ['Mobile Application', 'Online Learning', 'Interactive Graphics and Sonification', 'Procedural Modeling and Visualization'],\n    ['Desalination Proc'],\n    ['Geographic Information Systems (GIS)', 'Geographical Information System', 'Spatial Crowdsourcing', 'Approximate Nearest Neighbor Search'],\n    ['Data Imputation', 'Physiological Signal Classification', 'Privacy-preserving knowledge transf', 'Resource Allocation in Multi-access Edge Computing (MEC)', 'Time Series Prediction', 'Data Recovery', 'Data Center Managemen', 'Privacy-Preserving Mobile Cloud Inferenc', 'Indoor Localization', 'Power Grid Cybersecurity', 'Indoor Smartphone SLAM', 'Thermal Management in Data Cen'],\n    ['Wireless Charging', 'Quantum Key Distribution (QKD)', 'Wireless Communication and Sensor Network', 'Localization and Mapping'],\n    ['Cryptographic Protocols and Algorithm']\n]<result>"]

In [163]:
all_outputs = [x.strip('<result>') for x in all_outputs]
all_outputs = [x.strip('[').strip(']') for x in all_outputs]
all_outputs = [x.split('],')for x in all_outputs]

groupings = []
for x in all_outputs:
    for y in x:
        z = y.strip()
        z = z.strip('[').strip(']').split(',')
        group = []
        for a in z:
            b = a.strip().strip("'")
            group.append(b)
        groupings.append(group)
            

In [174]:
import openai

def get_1_topic(keywords):
    system_msg = "I am an AI assistant designed to help you identify keyword for a list of words. \
    I understand that I will receive a list of computer science related keywords and I am able to understand each keyword. \
    My response be a must be wrapped in <result>, eg: <result>topic<result> replace topic with the topic that sums up the list of keywords. "

    prompt = f"""
    Based on the list of keywords provided below, help me to extract the main keyword. 
    Your response be a must be wrapped in <result>, eg: <result>topic<result> replace topic with the topic that sums up the list of keywords.

    Keywords:

    {','.join(keywords)}
    """
    while True:
        try:
            response = openai.ChatCompletion.create(
                model="gpt-4",
                messages=[{"role": "system", "content": system_msg },
                        {"role": "user", "content": prompt}
                ],
                # request_timeout=10,
                temperature=1
            ) 
            output = response['choices'][0]['message']['content']
            if output.startswith('<result>') and output.endswith('<result>'):
                output = output.strip('<result>')
                return output
            else:
                print(output)
        except Exception as e:
            print(e)
            print('Trying Again!')

In [175]:
keyword = []

for group in groupings:
    if len(group)==1:
        output = group[0]
    else:
        output = get_1_topic(group)
    keyword.append(tuple([output, group]))


<result>Advanced Computer Sciences</result>
<result>Geospatial Technology</result>
<result>Federated Learning</result>
<result>Spatial Data Processing</result>
<result>Natural Language Processing</result>
<result>Multimodal Fusion</result>


In [187]:
dir = './research_interest'
files = [os.path.join(dir, f) for f in os.listdir(dir) if os.path.isfile(os.path.join(dir, f))]

interests = {}
for file in files:
    with open(file, 'r') as f:
        pubs = json.load(f)
    
    for pub in pubs:
        topic = pub['topic']

        for key in keyword:
            for interest in key[1]:
                if topic is None:
                    pub['agg_topic'] = None
                elif topic in interest:
                    pub['agg_topic'] = key[0]
                
        if 'agg_topic' not in pub:
            pub['agg_topic'] = topic
    
    with open(file,'w') as f:
        json.dump(pubs, f)


In [188]:
dir = './research_interest'
files = [os.path.join(dir, f) for f in os.listdir(dir) if os.path.isfile(os.path.join(dir, f))]

interests = {}
for file in files:
    with open(file, 'r') as f:
        pubs = json.load(f)
    
    for pub in pubs:
        interests[pub['agg_topic']] = interests.get(pub['agg_topic'], 0) + 1

len(interests)

118

In [192]:
agg_topic = group_topics(list(interests.keys()))

In [231]:
output = agg_topic
output = output.strip('<result>')
output = output.strip('[').strip(']')
output = output.split('],')
groupings = []
for x in output:
    z = x.strip()
    z = z.strip('[').strip(']').split(',')
    group = []
    for a in z:
        b = a.strip().strip("'")
        group.append(b)
    groupings.append(group)

In [232]:
keyword = []

for group in groupings:
    if len(group)==1:
        output = group[0]
    else:
        output = get_1_topic(group)
    keyword.append(tuple([output, group]))

<result>Computing</result>
<result>Federated Learning</result>


In [233]:
dir = './research_interest'
files = [os.path.join(dir, f) for f in os.listdir(dir) if os.path.isfile(os.path.join(dir, f))]

for file in files:
    with open(file, 'r') as f:
        pubs = json.load(f)
    
    for pub in pubs:
        topic = pub['agg_topic']
        if topic is None:
            pub['agg_topic'] = None
        else:
            for key in keyword:
                for interest in key[1]:
                    if topic in interest:
                        pub['final_topic'] = key[0]
                
        if 'final_topic' not in pub:
            pub['final_topic'] = topic
    
    with open(file,'w') as f:
        json.dump(pubs, f)


In [236]:
dir = './research_interest'
files = [os.path.join(dir, f) for f in os.listdir(dir) if os.path.isfile(os.path.join(dir, f))]

interests = {}
for file in files:
    with open(file, 'r') as f:
        pubs = json.load(f)
    
    for pub in pubs:
        interests[pub['final_topic']] = interests.get(pub['final_topic'], 0) + 1

interests

{None: 13434,
 'Computing': 251,
 'Artificial Intelligenc': 721,
 'Wireless Communication and Networking': 189,
 'Federated Learning': 399,
 'Data Processing': 66,
 'Biomedical Informatic': 42,
 'Graph Theory': 47,
 'Database Managemen': 63,
 'Software Engineering': 63,
 'Healthcare Technology': 12,
 'Blockchain Technology': 41,
 'Geospatial Technology': 38,
 'Computer Scienc': 78,
 'Audio Processing': 38,
 'Computer Interaction': 13,
 'Security': 57,
 'Computer Vision': 16,
 'Optimization Techniq': 7,
 'Data Structures and Algorithm': 9,
 'Knowledge Managemen': 3,
 'Robotic': 13,
 'Privacy-Preserving Fine-Tuning of Artificial Intelligence (AI) Foundation Models with Federated Learning, Differential Privacy, Offsite Tuning, and Parameter-Efficient Fine\xa0…': 1,
 'Autonomous Technology': 2,
 'Signal Processing': 14,
 'Hardware Design and Neural Network Optimization': 25,
 'Cryptography': 27,
 'Code Understanding, Generation, Translation, and Retrieva': 1,
 'Knowledge Science, Engineeri