In [1]:
from server import add_new_project, assign_paper_to_specific_project
from datetime import date


import re

def extract_arxiv_title_mapping(bibliography: str) -> dict:
    """
    Extracts a dictionary mapping arXiv IDs to paper titles from a bibliography string.
    
    Parameters:
        bibliography (str): The bibliography text to process.

    Returns:
        dict: A dictionary with arXiv ID (str) as key and paper title (str) as value.
    """
    arxiv_dict = {}
    
    # 정규표현식 패턴 정의
    title_pattern = r'“(.+?)”'
    arxiv_pattern = r'arXiv[:\s]*((\d{4}\.\d{4,5})(v\d+)?)|https?://arxiv\.org/abs/(\d{4}\.\d{4,5})(v\d+)?'
    
    # 줄 단위로 처리
    for line in bibliography.split('\n'):
        title_match = re.search(title_pattern, line)
        if not title_match:
            continue
        title = title_match.group(1)
        
        arxiv_match = re.search(arxiv_pattern, line)
        if not arxiv_match:
            continue
        
        arxiv_id = arxiv_match.group(1) if arxiv_match.group(1) else arxiv_match.group(4)
        arxiv_dict[arxiv_id] = title
    
    return arxiv_dict

Database connection established successfully.
Projects Table Schema:
id: integer
start_date: date
end_date: date
name: character varying
description: text

Papers Table Schema:
published_date: date
embedding: USER-DEFINED
user_added_date: date
authors: ARRAY
arxiv_id: character varying
categories: ARRAY
arxiv_url: text
pdf_file_path: text
primary_category: character varying
title: text
abstract: text

ProjectPapers Table Schema:
project_id: integer
relation_type: USER-DEFINED
paper_id: character varying


## Add project and papers

In [None]:
"""
Project: Domain-shift continual learning
Description: This project addresses the challenge of adapting large language models (LLMs) to evolving domains over time by developing continual learning techniques that prevent catastrophic forgetting while enabling fast and efficient adaptation to new domain distributions.
start: 2023-12-01
end: 2024-08-01
Abst\ract draft: We propose ShiftAdapt, a continual learning framework designed for LLMs facing non-stationary domain shifts. By combining domain-aware replay, dynamic adapter routing, and gradient isolation, our method enables efficient adaptation while preserving prior task knowledge. Experiments on sequential domain adaptation benchmarks demonstrate superior retention and transfer performance compared to existing baselines.
Query: Are there any recent papers on continual learning techniques for large language models that address non-stationary domain shifts, specifically using methods like domain-aware replay, dynamic adapter routing, or gradient isolation to prevent catastrophic forgetting while enabling fast adaptation to evolving domain distributions?
"""
# await add_new_project(project_name="Domain-shift continual learning",
#                 project_description="This project addresses the challenge of adapting large language models (LLMs) to evolving domains over time by developing continual learning techniques that prevent catastrophic forgetting while enabling fast and efficient adaptation to new domain distributions.",
#                 project_start=date(2023, 12, 1),
#                 project_end=date(2024, 8, 1))




bibliography = """
[1] T. Qin, S. Wang, and H. Li, “Generalizing to Evolving Domains with Latent Structure-Aware Sequential Autoencoder,” Jun. 16, 2022, arXiv: arXiv:2205.07649. doi: 10.48550/arXiv.2205.07649.
[2] H. Shi et al., “Continual Learning of Large Language Models: A Comprehensive Survey,” Nov. 25, 2024, arXiv: arXiv:2404.16789. doi: 10.48550/arXiv.2404.16789.
[3] Ç. Yıldız, N. K. Ravichandran, N. Sharma, M. Bethge, and B. Ermis, “Investigating Continual Pretraining in Large Language Models: Insights and Implications,” Feb. 12, 2025, arXiv: arXiv:2402.17400. doi: 10.48550/arXiv.2402.17400.
[4] S. Zhang et al., “Enhanced Fine-Tuning of Lightweight Domain-Specific Q&A Model Based on Large Language Models,” Aug. 23, 2024, arXiv: arXiv:2408.12247. doi: 10.48550/arXiv.2408.12247.
[5] C. Jeong, “Fine-tuning and Utilization Methods of Domain-specific LLMs,” jiis, vol. 30, no. 1, pp. 93–120, Mar. 2024, doi: 10.13088/jiis.2024.30.1.093.
[6] P. Khayatan, M. Shukor, J. Parekh, and M. Cord, “Analyzing Fine-tuning Representation Shift for Multimodal LLMs Steering alignment,” Jan. 06, 2025, arXiv: arXiv:2501.03012. doi: 10.48550/arXiv.2501.03012.
[7] K. Thandiackal, L. Piccinelli, P. Pati, and O. Goksel, “Multi-scale Feature Alignment for Continual Learning of Unlabeled Domains,” Feb. 02, 2023, arXiv: arXiv:2302.01287. doi: 10.48550/arXiv.2302.01287.
[8] M. Rostami, “Continuous Unsupervised Domain Adaptation Using Stabilized Representations and Experience Replay,” Jan. 31, 2024, arXiv: arXiv:2402.00580. doi: 10.48550/arXiv.2402.00580.
[9] M. Toldo, U. Michieli, and P. Zanuttigh, “Learning with Style: Continual Semantic Segmentation Across Tasks and Domains,” Oct. 13, 2022, arXiv: arXiv:2210.07016. doi: 10.48550/arXiv.2210.07016.
[10] J. Houyon et al., “Online Distillation with Continual Learning for Cyclic Domain Shifts,” in 2023 IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops (CVPRW), Jun. 2023, pp. 2437–2446. doi: 10.1109/cvprw59228.2023.00242.
"""

arxiv_dict = extract_arxiv_title_mapping(bibliography)
print(len(arxiv_dict))
for key in arxiv_dict.keys():
    print(key)
    try : 
        await assign_paper_to_specific_project(project_id=4,
                                           arxiv_url_or_id=key)
    except Exception as e:
        print(f"Error assigning paper {key}: {e}")

8
2205.07649
Paper 2205.07649 assigned to project 4 successfully.
2404.16789
Paper 2404.16789 assigned to project 4 successfully.
2402.17400
Rollback!! Error assigning paper to project: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
2408.12247
Rollback!! Error assigning paper to project: HTTPConnectionPool(host='export.arxiv.org', port=80): Read timed out. (read timeout=10)
2501.03012
Paper 2501.03012 assigned to project 4 successfully.
2302.01287
Paper 2302.01287 assigned to project 4 successfully.
2402.00580
Rollback!! Error assigning paper to project: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
2210.07016
Rollback!! Error assigning paper to project: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))


In [None]:
"""
Project: PEFT for LLM personalization
Description: This project aims to personalize large language models (LLMs) for individual users by developing parameter-efficient continual fine-tuning methods that adapt to evolving user interactions while preserving prior knowledge.
start: 2024-03-15
end: 2025-10-15
Abstract draft: We introduce PersonaTune, a continual fine-tuning approach for personalizing LLMs using low-rank adaptation (LoRA) and task-conditioned replay. Our method enables efficient adaptation to user-specific data while mitigating catastrophic forgetting. Experiments on user dialogue benchmarks show improved long-term personalization with minimal memory overhead.
Query: I’m looking for papers on parameter-efficient continual fine-tuning techniques for personalizing large language models. Are there recent methods using LoRA, adapters, or memory-augmented approaches to handle evolving user-specific tasks?
"""
## add project
await add_new_project(project_name="PEFT for LLM personalization",
                project_description="This project explores efficient techniques to personalize large language models (LLMs) using lightweight continual fine-tuning based on user-specific interaction data. It aims to enable models to adapt incrementally to individual users without full retraining or compromising generalization.",
                project_start=date(2024, 3, 15),
                project_end=date(2025, 10, 15))



## add papers to project
bibliography = """
[1] Y. Cao et al., “Personalized Steering of Large Language Models: Versatile Steering Vectors Through Bi-directional Preference Optimization,” Jul. 29, 2024, arXiv: arXiv:2406.00045. doi: 10.48550/arXiv.2406.00045.
[2] Z. Wang et al., “Learning to Prompt for Continual Learning,” Mar. 21, 2022, arXiv: arXiv:2112.08654. doi: 10.48550/arXiv.2112.08654.
[3] J. Liu et al., “Parameter-Efficient Fine-Tuning for Continual Learning: A Neural Tangent Kernel Perspective,” May 17, 2025, arXiv: arXiv:2407.17120. doi: 10.48550/arXiv.2407.17120.
[4] Z. Tan, Z. Liu, and M. Jiang, “Personalized Pieces: Efficient Personalized Large Language Models through Collaborative Efforts,” Oct. 28, 2024, arXiv: arXiv:2406.10471. doi: 10.48550/arXiv.2406.10471.
[5] R. M. S. Khan et al., “PortLLM: Personalizing Evolving Large Language Models with Training-Free and Portable Model Patches,” Mar. 29, 2025, arXiv: arXiv:2410.10870. doi: 10.48550/arXiv.2410.10870.
[6] A. Agarwal, S. K. Ramesh, A. Sengupta, and T. Chakraborty, “Step-by-Step Unmasking for Parameter-Efficient Fine-tuning of Large Language Models,” Aug. 27, 2024, arXiv: arXiv:2408.14470. doi: 10.48550/arXiv.2408.14470.
[7] C. Song et al., “ConPET: Continual Parameter-Efficient Tuning for Large Language Models,” Sep. 26, 2023, arXiv: arXiv:2309.14763. doi: 10.48550/arXiv.2309.14763.
[8] R. Lee, M. Kim, F. Rezk, R. Li, S. I. Venieris, and T. Hospedales, “FedP$^2$EFT: Federated Learning to Personalize Parameter Efficient Fine-Tuning for Multilingual LLMs,” Feb. 05, 2025, arXiv: arXiv:2502.04387. doi: 10.48550/arXiv.2502.04387.
[9] D. Peng, Z. Fu, and J. Wang, “PocketLLM: Enabling On-Device Fine-Tuning for Personalized LLMs,” Jul. 01, 2024, arXiv: arXiv:2407.01031. doi: 10.48550/arXiv.2407.01031.
[10] H. Chen and P. N. Garner, “Bayesian Parameter-Efficient Fine-Tuning for Overcoming Catastrophic Forgetting,” Dec. 06, 2024, arXiv: arXiv:2402.12220. doi: 10.48550/arXiv.2402.12220.
[11] Z. Tan, Q. Zeng, Y. Tian, Z. Liu, B. Yin, and M. Jiang, “Democratizing Large Language Models via Personalized Parameter-Efficient Fine-tuning,” Feb. 08, 2025, arXiv: arXiv:2402.04401. doi: 10.48550/arXiv.2402.04401.
"""

arxiv_dict = extract_arxiv_title_mapping(bibliography)
print(len(arxiv_dict))
for key in arxiv_dict.keys():
    print(key)
    try : 
        await assign_paper_to_specific_project(project_id=5,
                                           arxiv_url_or_id=key)
    except Exception as e:
        print(f"Error assigning paper {key}: {e}")

11
2406.00045
Rollback!! Error assigning paper to project: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
2112.08654
Paper 2112.08654 assigned to project 5 successfully.
2407.17120
Rollback!! Error assigning paper to project: HTTPConnectionPool(host='export.arxiv.org', port=80): Read timed out. (read timeout=10)
2406.10471
Paper 2406.10471 assigned to project 5 successfully.
2410.10870
Rollback!! Error assigning paper to project: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
2408.14470
Paper 2408.14470 assigned to project 5 successfully.
2309.14763
Rollback!! Error assigning paper to project: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
2502.04387
Rollback!! Error assigning paper to project: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
2407.01031
Paper 2407.01031 assigned to project 5 successfully.
2

In [None]:
"""
Project: VLM for compositonal reasoning
Description: This project focuses on enhancing the compositional reasoning capabilities of vision-language models (VLMs), enabling them to understand complex visual scenes by combining structured natural language prompts and modality-specific attention mechanisms.
start: 2024-10-07
end: 2026-02-03
Abstract draft: We propose CompVLM, a vision-language framework that enhances compositional reasoning by introducing structured prompt engineering and disentangled attention for vision and language inputs. Our method achieves state-of-the-art performance on VQA and NLVR2, demonstrating improved generalization to unseen compositional queries.
Query: Can you recommend recent papers on improving compositional reasoning in vision-language models? I’m especially interested in methods involving structured prompts or modality-specific attention, evaluated on tasks like VQA or NLVR2.
"""
## add project
await add_new_project(project_name="VLM for compositonal reasoning",
                project_description="This project focuses on enhancing the compositional reasoning capabilities of vision-language models (VLMs), enabling them to understand complex visual scenes by combining structured natural language prompts and modality-specific attention mechanisms.",
                project_start=date(2024, 10, 7),
                project_end=date(2026, 2, 3))



## add papers to project
bibliography = """
[1] J. Li et al., “CoVLM: Composing Visual Entities and Relationships in Large Language Models Via Communicative Decoding,” Nov. 06, 2023, arXiv: arXiv:2311.03354. doi: 10.48550/arXiv.2311.03354.
[2] K. Zhou, J. Yang, C. C. Loy, and Z. Liu, “Learning to Prompt for Vision-Language Models,” Int J Comput Vis, vol. 130, no. 9, pp. 2337–2348, Sep. 2022, doi: 10.1007/s11263-022-01653-1.
[3] A. Pal, M. van Spengler, G. M. D. di Melendugno, A. Flaborea, F. Galasso, and P. Mettes, “Compositional Entailment Learning for Hyperbolic Vision-Language Models,” Mar. 01, 2025, arXiv: arXiv:2410.06912. doi: 10.48550/arXiv.2410.06912.
[4] F. Parascandolo, N. Moratelli, E. Sangineto, L. Baraldi, and R. Cucchiara, “Causal Graphical Models for Vision-Language Compositional Understanding,” Apr. 15, 2025, arXiv: arXiv:2412.09353. doi: 10.48550/arXiv.2412.09353.
[5] P. Cascante-Bonilla, Y. Hou, Y. T. Cao, H. D. III, and R. Rudinger, “Natural Language Inference Improves Compositionality in Vision-Language Models,” Oct. 29, 2024, arXiv: arXiv:2410.22315. doi: 10.48550/arXiv.2410.22315.
[6] T. Thrush et al., “Winoground: Probing Vision and Language Models for Visio-Linguistic Compositionality,” Apr. 22, 2022, arXiv: arXiv:2204.03162. doi: 10.48550/arXiv.2204.03162.
[7] Z. Ma, J. Hong, M. O. Gul, M. Gandhi, I. Gao, and R. Krishna, “CREPE: Can Vision-Language Foundation Models Reason Compositionally?,” May 16, 2023, arXiv: arXiv:2212.07796. doi: 10.48550/arXiv.2212.07796.
[8] M. Yuksekgonul, F. Bianchi, P. Kalluri, D. Jurafsky, and J. Zou, “When and why vision-language models behave like bags-of-words, and what to do about it?,” Mar. 23, 2023, arXiv: arXiv:2210.01936. doi: 10.48550/arXiv.2210.01936.
[9] N. Yellinek, L. Karlinsky, and R. Giryes, “3VL: Using Trees to Improve Vision-Language Models’ Interpretability,” Jan. 15, 2025, arXiv: arXiv:2312.17345. doi: 10.48550/arXiv.2312.17345.
[10] T. Ossowski, M. Jiang, and J. Hu, “Prompting Large Vision-Language Models for Compositional Reasoning,” Jan. 20, 2024, arXiv: arXiv:2401.11337. doi: 10.48550/arXiv.2401.11337.
"""


arxiv_dict = extract_arxiv_title_mapping(bibliography)
print(len(arxiv_dict))
for key in arxiv_dict.keys():
    print(key)
    try : 
        await assign_paper_to_specific_project(project_id=6,
                                           arxiv_url_or_id=key)
    except Exception as e:
        print(f"Error assigning paper {key}: {e}")

9
2311.03354
Rollback!! Error assigning paper to project: HTTPConnectionPool(host='export.arxiv.org', port=80): Read timed out. (read timeout=10)
2410.06912
Rollback!! Error assigning paper to project: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
2412.09353
Paper 2412.09353 assigned to project 6 successfully.
2410.22315
Rollback!! Error assigning paper to project: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
2204.03162
Rollback!! Error assigning paper to project: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
2212.07796
Paper 2212.07796 assigned to project 6 successfully.
2210.01936
Rollback!! Error assigning paper to project: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
2312.17345
Rollback!! Error assigning paper to project: ('Connection aborted.', RemoteDisconnected('Remote end closed connection w

In [None]:
"""
Project: Deep Morphometric Profiling of Single Cells for Phenotype Discovery
Description: This project focuses on using deep learning to analyze single-cell morphologies from microscopy images, aiming to uncover hidden phenotypic patterns and cellular states across diverse biological conditions.
start: 2020-07-01
end: 2021-09-10
Abstract draft: We present CellMorph, a deep morphometric profiling framework for analyzing single-cell images. By integrating convolutional embeddings with unsupervised clustering, our method identifies rare and transitional cell states under drug perturbations. Experiments on public microscopy datasets reveal CellMorph’s ability to capture phenotypic heterogeneity and suggest novel cell-type signatures.
Query: I’m looking for recent research on deep learning methods for analyzing single-cell morphology from microscopy images. Particularly, I’m interested in phenotype discovery, morphometric embedding, or clustering of rare cell states under biological perturbations.
"""

await add_new_project(project_name="Deep Morphometric Profiling of Single Cells for Phenotype Discovery",
                project_description="This project focuses on using deep learning to analyze single-cell morphologies from microscopy images, aiming to uncover hidden phenotypic patterns and cellular states across diverse biological conditions.",
                project_start=date(2020, 7, 1),
                project_end=date(2021, 9, 10))



## add papers to project
bibliography = """
[1] G. Raghavendran et al., “Deep learning assisted high resolution microscopy image processing for phase segmentation in functional composite materials,” Mar. 17, 2025, arXiv: arXiv:2410.01928. doi: 10.48550/arXiv.2410.01928.
[2] J. Wenckstern et al., “AI-powered virtual tissues from spatial proteomics for clinical diagnostics and biomedical discovery,” Jan. 10, 2025, arXiv: arXiv:2501.06039. doi: 10.48550/arXiv.2501.06039.
[3] E. Weisbart, A. Kumar, J. Arevalo, A. E. Carpenter, B. A. Cimini, and S. Singh, “Cell Painting Gallery: an open resource for image-based profiling,” Nat Methods, vol. 21, no. 10, pp. 1775–1777, Oct. 2024, doi: 10.1038/s41592-024-02399-z.
[4] S. Ge, S. Sun, H. Xu, Q. Cheng, and Z. Ren, “Deep Learning in Single-Cell and Spatial Transcriptomics Data Analysis: Advances and Challenges from a Data Science Perspective,” Dec. 06, 2024, arXiv: arXiv:2412.03614. doi: 10.48550/arXiv.2412.03614.
[5] Q. Tang et al., “Morphological Profiling for Drug Discovery in the Era of Deep Learning,” Jan. 15, 2024, arXiv: arXiv:2312.07899. doi: 10.48550/arXiv.2312.07899.
[6] P. Zhou, B. Du, and Y. Xu, “CellSeg1: Robust Cell Segmentation with One Training Image,” Dec. 02, 2024, arXiv: arXiv:2412.01410. doi: 10.48550/arXiv.2412.01410.
[7] D. Molho et al., “Deep Learning in Single-Cell Analysis,” Nov. 05, 2022, arXiv: arXiv:2210.12385. doi: 10.48550/arXiv.2210.12385.
[8] T. Zhang, H. J. McCourty, B. M. Sanchez-Tafolla, A. Nikolaev, and L. S. Mihaylova, “MorphoSeg: An Uncertainty-Aware Deep Learning Method for Biomedical Segmentation of Complex Cellular Morphologies,” May 30, 2025, arXiv: arXiv:2409.17110. doi: 10.48550/arXiv.2409.17110.
[9] X. Ma, Y. Tao, Y. Zhang, Z. Ji, Y. Zhang, and Q. Chen, “Test-Time Generative Augmentation for Medical Image Segmentation,” Jun. 25, 2024, arXiv: arXiv:2406.17608. doi: 10.48550/arXiv.2406.17608.
[10] S. Masubuchi et al., “Deep-Learning-Based Image Segmentation Integrated with Optical Microscopy for Automatically Searching for Two-Dimensional Materials,” npj 2D Mater Appl, vol. 4, no. 1, p. 3, Mar. 2020, doi: 10.1038/s41699-020-0137-z.
"""


arxiv_dict = extract_arxiv_title_mapping(bibliography)
print(len(arxiv_dict))
for key in arxiv_dict.keys():
    print(key)
    try : 
        await assign_paper_to_specific_project(project_id=7,
                                           arxiv_url_or_id=key)
    except Exception as e:
        print(f"Error assigning paper {key}: {e}")

8
2410.01928
Rollback!! Error assigning paper to project: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
2501.06039
Rollback!! Error assigning paper to project: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
2412.03614
Rollback!! Error assigning paper to project: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
2312.07899
Paper 2312.07899 assigned to project 7 successfully.
2412.01410
Paper 2412.01410 assigned to project 7 successfully.
2210.12385
Paper 2210.12385 assigned to project 7 successfully.
2409.17110
Paper 2409.17110 assigned to project 7 successfully.
2406.17608
Paper 2406.17608 assigned to project 7 successfully.


In [None]:
"""
Project: Causal Inference of Economic Policies using Panel Data
Description: This project aims to identify the causal effects of government policies—such as minimum wage increases or tax credits—on labor market outcomes using panel data econometric techniques, including fixed effects models and instrumental variables.
start: 2025-07-02
end: 2025-12-30
Abstract draft: This paper analyzes the impact of state-level minimum wage increases on employment and working hours using U.S. panel data from 2000 to 2022. We employ a two-way fixed effects model to control for time-invariant heterogeneity and national trends. To address potential endogeneity in policy adoption, we use regional political shifts as an instrument for minimum wage changes. Our estimates suggest a modest negative effect on employment among low-wage workers, with stronger effects in states with higher baseline unemployment. Robustness checks using alternative specifications and placebo tests support the validity of our identification strategy.
Query: Can you recommend recent econometrics papers that estimate the causal effects of public policies, such as minimum wage or tax changes, using panel data or instrumental variables? I’m particularly interested in identification strategies and heterogeneous treatment effects.
"""

await add_new_project(project_name="Causal Inference of Economic Policies using Panel Data",
                project_description="This project aims to identify the causal effects of government policies—such as minimum wage increases or tax credits—on labor market outcomes using panel data econometric techniques, including fixed effects models and instrumental variables.",
                project_start=date(2025, 7, 2),
                project_end=date(2025, 12, 30))



## add papers to project
bibliography = """
[1] K. Vafa, S. Athey, and D. M. Blei, “Estimating Wage Disparities Using Foundation Models,” Apr. 29, 2025, arXiv: arXiv:2409.09894. doi: 10.48550/arXiv.2409.09894.
[2] H. Kanayama, S. Miyaji, and S. Otani, “Who Bears the Cost? High-Frequency Evidence on Minimum Wage Effects and Amenity Pass-Through in Spot Labor Markets,” May 27, 2025, arXiv: arXiv:2505.04555. doi: 10.48550/arXiv.2505.04555.
[3] M. Bossler, Y. Liang, and T. Schank, “The Devil is in the Details: Heterogeneous Effects of the German Minimum Wage on Working Hours and Minijobs,” May 21, 2024, arXiv: arXiv:2403.17206. doi: 10.48550/arXiv.2403.17206.
[4] A. Sato, “The effect of minimum wages on employment in the presence of productivity fluctuations,” Feb. 26, 2025, arXiv: arXiv:2502.18261. doi: 10.48550/arXiv.2502.18261.
[5] G. Zhou, Y. Han, and X. Yu, “Covariate-Adjusted Deep Causal Learning for Heterogeneous Panel Data Models,” May 26, 2025, arXiv: arXiv:2505.20536. doi: 10.48550/arXiv.2505.20536.
[6] J. Zhou and B. Liang, “Regression Analysis of Ordinal Panel Count Data in Recurrent Medication Non-adherence,” May 28, 2025, arXiv: arXiv:2505.21858. doi: 10.48550/arXiv.2505.21858.
[7] A. Chudik, M. H. Pesaran, and R. P. Smith, “Analysis of Multiple Long Run Relations in Panel Data Models with Applications to Financial Ratios,” Jun. 02, 2025, arXiv: arXiv:2506.02135. doi: 10.48550/arXiv.2506.02135.
[8] A. Wu, K. Kuang, R. Xiong, and F. Wu, “Instrumental Variables in Causal Inference and Machine Learning: A Survey,” Dec. 12, 2022, arXiv: arXiv:2212.05778. doi: 10.48550/arXiv.2212.05778.
[9] Y. Liu, “Policy Learning under Endogeneity Using Instrumental Variables,” Mar. 01, 2024, arXiv: arXiv:2206.09883. doi: 10.48550/arXiv.2206.09883.
[10] T. Ura and L. Zhang, “Policy Relevant Treatment Effects with Multidimensional Unobserved Heterogeneity,” May 08, 2025, arXiv: arXiv:2403.13738. doi: 10.48550/arXiv.2403.13738.
"""



arxiv_dict = extract_arxiv_title_mapping(bibliography)
print(len(arxiv_dict))
for key in arxiv_dict.keys():
    print(key)
    try : 
        await assign_paper_to_specific_project(project_id=8,
                                           arxiv_url_or_id=key)
    except Exception as e:
        print(f"Error assigning paper {key}: {e}")

10
2409.09894
Rollback!! Error assigning paper to project: HTTPConnectionPool(host='export.arxiv.org', port=80): Read timed out. (read timeout=10)
2505.04555
Rollback!! Error assigning paper to project: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
2403.17206
Rollback!! Error assigning paper to project: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
2502.18261
Rollback!! Error assigning paper to project: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
2505.20536
Rollback!! Error assigning paper to project: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
2505.21858
Rollback!! Error assigning paper to project: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
2506.02135
Rollback!! Error assigning paper to project: ('Connection aborted.', RemoteDisconnected('Remote e

## construct test dataset

In [11]:
Casual_inference = """
[1] B. Schölkopf et al., “Towards Causal Representation Learning,” Feb. 22, 2021, arXiv: arXiv:2102.11107. doi: 10.48550/arXiv.2102.11107.
[2] H. Kanayama and S. Otani, “Nonparametric Estimation of Matching Efficiency and Elasticity in a Spot Gig Work Platform: 2019-2023,” Mar. 07, 2025, arXiv: arXiv:2412.19024. doi: 10.48550/arXiv.2412.19024.
[3] R. Bommasani et al., “On the Opportunities and Risks of Foundation Models,” Jul. 12, 2022, arXiv: arXiv:2108.07258. doi: 10.48550/arXiv.2108.07258.
[4] L. Battaglia, T. Christensen, S. Hansen, and S. Sacher, “Inference for Regression with Variables Generated by AI or Machine Learning,” Apr. 30, 2025, arXiv: arXiv:2402.15585. doi: 10.48550/arXiv.2402.15585.
"""

continual_PEFT = """
[1] Y. Yang, G. Long, T. Shen, J. Jiang, and M. Blumenstein, “Dual-Personalizing Adapter for Federated Foundation Models,” arXiv.org. Accessed: Jun. 07, 2025. [Online]. Available: https://arxiv.org/abs/2403.19211v2
[2] Y. Zhang, Z. Qin, Z. Wu, J. Hou, and S. Deng, “Personalized Federated Fine-Tuning for LLMs via Data-Driven Heterogeneous Model Architectures,” arXiv.org. Accessed: Jun. 07, 2025. [Online]. Available: https://arxiv.org/abs/2411.19128v3
[3] N. Loo, S. Swaroop, and R. E. Turner, “Generalized Variational Continual Learning,” Nov. 24, 2020, arXiv: arXiv:2011.12328. doi: 10.48550/arXiv.2011.12328.
[4] N. Subramani, N. Suresh, and M. E. Peters, “Extracting Latent Steering Vectors from Pretrained Language Models,” arXiv.org. Accessed: Jun. 07, 2025. [Online]. Available: https://arxiv.org/abs/2205.05124v1
"""

deep_morphometric = """
[1] J. O. Cross-Zamirski, G. Williams, E. Mouchet, C.-B. Schönlieb, R. Turkki, and Y. Wang, “Self-Supervised Learning of Phenotypic Representations from Cell Images with Weak Labels,” arXiv.org. Accessed: Jun. 07, 2025. [Online]. Available: https://arxiv.org/abs/2209.07819v2
[2] Y. Li, M. A. Rezaei, C. Li, X. Li, and D. Wu, “DeepAtom: A Framework for Protein-Ligand Binding Affinity Prediction,” Dec. 01, 2019, arXiv: arXiv:1912.00318. doi: 10.48550/arXiv.1912.00318.
[3] C. Bunne et al., “How to Build the Virtual Cell with Artificial Intelligence: Priorities and Opportunities,” Oct. 14, 2024, arXiv: arXiv:2409.11654. doi: 10.48550/arXiv.2409.11654.
[4] K. Kenyon-Dean et al., “ViTally Consistent: Scaling Biological Representation Learning for Cell Microscopy,” arXiv.org. Accessed: Jun. 07, 2025. [Online]. Available: https://arxiv.org/abs/2411.02572v1
"""

domain_shift = """
[1] K. Gupta et al., “Continual Pre-Training of Large Language Models: How to (re)warm your model?,” Sep. 06, 2023, arXiv: arXiv:2308.04014. doi: 10.48550/arXiv.2308.04014.
[2] S. Duwal, S. Prasai, and S. Manandhar, “Domain-adaptative Continual Learning for Low-resource Tasks: Evaluation on Nepali,” arXiv.org. Accessed: Jun. 07, 2025. [Online]. Available: https://arxiv.org/abs/2412.13860v1
[3] Y. Shi et al., “Gradient Matching for Domain Generalization,” Jul. 14, 2021, arXiv: arXiv:2104.09937. doi: 10.48550/arXiv.2104.09937.
[4] A. T. Nguyen, T. Tran, Y. Gal, and A. G. Baydin, “Domain Invariant Representation Learning with Domain Density Transformations,” Feb. 15, 2022, arXiv: arXiv:2102.05082. doi: 10.48550/arXiv.2102.05082.
"""

VLM = """
[1] C. Conwell and T. Ullman, “Testing Relational Understanding in Text-Guided Image Generation,” Jul. 29, 2022, arXiv: arXiv:2208.00005. doi: 10.48550/arXiv.2208.00005.
[2] H. R. Kirk, B. Vidgen, P. Röttger, T. Thrush, and S. A. Hale, “Hatemoji: A Test Suite and Adversarially-Generated Dataset for Benchmarking and Detecting Emoji-based Hate,” May 06, 2022, arXiv: arXiv:2108.05921. doi: 10.48550/arXiv.2108.05921.
[3] L. Yuan et al., “Florence: A New Foundation Model for Computer Vision,” Nov. 22, 2021, arXiv: arXiv:2111.11432. doi: 10.48550/arXiv.2111.11432.
[4] W. Jin et al., “GRILL: Grounded Vision-language Pre-training via Aligning Text and Image Regions,” May 24, 2023, arXiv: arXiv:2305.14676. doi: 10.48550/arXiv.2305.14676.
"""