# Generate Web Pages for the Survey Website
This is a utility notebook to generate posts for each paper we studied. The posts are in markdown format and will be used to generate the website for the survey.


In [6]:
import pandas as pd
import sqlite3
import os

def get_all_relevant_papers() -> pd.DataFrame:
    # Connect to the SQLite database
    conn = sqlite3.connect('../../data/db_llm_education_survey.sqlite3')

    # Create a cursor object
    cursor = conn.cursor()

    # Execute a SQL query
    cursor.execute('SELECT paper_id, title, abstract, source, bibtex, type, doi FROM llm_education_survey_paper JOIN '
                   'llm_education_survey_analysis ON llm_education_survey_paper.id = llm_education_survey_analysis.paper_id '
                   'WHERE is_relevant = 1 AND user_id = 15')

    # fetch all the results in a data frame
    rows = cursor.fetchall()
    return pd.DataFrame(rows, columns=['paper_id', 'title', 'abstract', 'source', 'bibtex', 'type', 'doi'])

papers = get_all_relevant_papers()
papers

Unnamed: 0,paper_id,title,abstract,source,bibtex,type,doi
0,42,Training Language Models for Programming Feedb...,,"springer, scopus","@inbook{koutcheme2023training, title={Training...",Representation,10.1007/978-3-031-36272-9_79
1,54,Comparative Quality Analysis of GPT-Based Mult...,,springer,"@InProceedings{grevisse2024comparative,\r\naut...",Application,10.1007/978-3-031-46813-1_29
2,59,Assessing ChatGPT’s Proficiency in CS1-Level P...,,springer,"@inbook{sanchez2023assessing, title={Assessing...",Application,10.1007/978-3-031-47372-2_7
3,71,Performance of Large Language Models in a Comp...,Large language models such as ChatGPT-3.5 and ...,"springer, scopus, arxiv","@InProceedings{kruger2024performance,\r\nautho...",Application,10.1007/978-3-031-50485-3_40
4,72,Bridging the Programming Skill Gap with ChatGP...,,springer,"@inbook{reiche2024bridging, title={Bridging th...",Application,10.1007/978-3-031-50485-3_42
...,...,...,...,...,...,...,...
120,1094,CS1-LLM: Integrating LLMs into CS1 Instruction,"The recent, widespread availability of Large L...",arxiv,"@misc{vadaparty2024cs1llm,\n doi = {10.48550/...",Application,10.48550/arXiv.2406.15379
121,1098,The Impact of Structured Prompt-Driven Generat...,AbstractView references\n\nThis paper investig...,scopus,"@conference{garg2024impact,\r\nauthor={Ashish ...",Application,10.5220/0012693000003693
122,1494,Examining Student Use of AI in CS1 and CS2,The launch of ChatGPT in November 2022 marked ...,acm,"@article{manley2024examining,\r\nauthor = {Man...",Application,
123,1497,Can ChatGPT Pass a CS1 Python Course?,In this paper we determine whether an LLM-Chat...,acm,"@article{sharpe2024can,\nauthor = {Sharpe, Jam...",Application,


In [18]:

output_folder = '../../website/_posts/'
os.makedirs(output_folder, exist_ok=True)

def get_pub_date(doi:str) -> str:
    import requests
    import json
    url = f'https://api.crossref.org/works/{doi}'
    response = requests.get(url)
    if response.status_code == 200:
        data = json.loads(response.text)
        #return data['message']['published']#['date-parts'][0]
        return data['message']['created']['date-parts'][0]
    return None

for index, row in papers.iterrows():
    pub_date = get_pub_date(row['doi'])
    print(row['doi'], pub_date)
    if pub_date is None:
        pub_date =  f'{row["year"]}-01-01'
    elif len(pub_date) == 1:
        pub_date =  f'{pub_date[0]}-01-01'
    elif len(pub_date) == 2:
        pub_date = f'{pub_date[0]}-{pub_date[1]}-01'
    elif len(pub_date) == 3:
        pub_date = "-".join([str(x) for x in pub_date])
    output_file = os.path.join(output_folder, f'{pub_date}-{row["paper_id"]}.md')
    with open(output_file, 'w') as f:
        title = row['title'].replace('"', '\\"')
        f.write('---\n')
        f.write(f'title: "{title}"\n')
        f.write(f'layout: single\n')
        f.write('---\n')
        f.write('\n')
        f.write('## Abstract\n')
        f.write(f'{row["abstract"]}\n')

10.1007/978-3-031-36272-9_79 [2023, 6, 25]
10.1007/978-3-031-46813-1_29 [2023, 10, 23]
10.1007/978-3-031-47372-2_7 [2023, 11, 13]
10.1007/978-3-031-50485-3_40 [2024, 1, 24]
10.1007/978-3-031-50485-3_42 [2024, 1, 24]
10.1007/978-3-031-53022-7_5 [2024, 2, 6]
10.1007/978-3-031-53656-4_8 [2024, 2, 14]
10.1007/978-3-031-56478-9_21 [2024, 3, 29]
10.1007/978-3-031-60609-0_20 [2024, 6, 1]
10.1007/978-3-031-61691-4_14 [2024, 6, 1]
10.1007/978-3-031-63028-6_20 [2024, 5, 31]
10.1007/978-3-031-64302-6_19 [2024, 7, 1]
10.1007/978-981-97-0730-0_27 [2024, 2, 25]
10.1007/978-981-97-0730-0_28 [2024, 2, 25]
10.1007/978-981-99-8385-8_14 [2023, 11, 23]
10.1007/978-981-99-8429-9_39 [2023, 12, 23]
10.1007/s10639-024-12520-6 [2024, 2, 16]
10.1007/s10639-024-12765-1 [2024, 5, 18]
10.1007/s40593-024-00406-0 [2024, 5, 15]
10.1007/s40593-024-00414-0 [2024, 6, 25]
10.1007/s42979-024-02714-7 [2024, 3, 29]
10.1007/s42979-024-02963-6 [2024, 6, 10]
10.1109/ACCESS.2024.3380909 [2024, 3, 22]
10.1109/CEI60616.2023.10527

KeyError: 'year'