## The Whole DB

In [12]:
import sqlite3
import pandas as pd

# Path to your SQLite database file
db_path = './db_llm_education_survey.sqlite3'  # Ensure this path is correct

# Connect to the SQLite database
conn = sqlite3.connect(db_path)

# Function to get basic information about the database
def get_db_info(connection):
    cursor = connection.cursor()
    
    # Get the list of tables
    cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
    tables = cursor.fetchall()
    
    db_info = {}
    for table in tables:
        table_name = table[0]
        
        # Get the table schema
        cursor.execute(f"PRAGMA table_info({table_name});")
        schema = cursor.fetchall()
        
        # Get the number of rows
        cursor.execute(f"SELECT COUNT(*) FROM {table_name};")
        row_count = cursor.fetchone()[0]
        
        db_info[table_name] = {
            'schema': schema,
            'row_count': row_count
        }
    
    return db_info

# Get the database information
database_info = get_db_info(conn)

# Close the connection
conn.close()

# Prepare the data for display
tables_info = []
for table, details in database_info.items():
    for column in details['schema']:
        tables_info.append({
            'Table Name': table,
            'Column Name': column[1],
            'Column Type': column[2],
            'Row Count': details['row_count']
        })

# Convert to DataFrame for display
tables_info_df = pd.DataFrame(tables_info)

# Display the DataFrame
print(tables_info_df)


                                           Table Name             Column Name  \
0                                   django_migrations                      id   
1                                   django_migrations                     app   
2                                   django_migrations                    name   
3                                   django_migrations                 applied   
4                                     sqlite_sequence                    name   
5                                     sqlite_sequence                     seq   
6                     llm_education_survey_discipline                      id   
7                     llm_education_survey_discipline                    name   
8                     llm_education_survey_discipline             description   
9               llm_education_survey_educationallevel                      id   
10              llm_education_survey_educationallevel                    name   
11              llm_educatio

## Discipline

In [23]:
import sqlite3
import pandas as pd
from IPython.display import display

# Path to your SQLite database file
db_path = './db_llm_education_survey.sqlite3'  # Ensure this path is correct

# Connect to the SQLite database
conn = sqlite3.connect(db_path)
cursor = conn.cursor()

# Get the list of all tables
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
tables = cursor.fetchall()

foreign_keys = []

# Iterate through each table and get foreign key constraints
for table in tables:
    table_name = table[0]
    cursor.execute(f"PRAGMA foreign_key_list({table_name});")
    fk_list = cursor.fetchall()
    for fk in fk_list:
        if fk[2] == 'llm_education_survey_discipline':
            foreign_keys.append({
                'table_name': table_name,
                'column_name': fk[3],
                'references_table': fk[2],
                'references_column': fk[4]
            })

# Get the discipline data
discipline_df = pd.read_sql_query("SELECT * FROM llm_education_survey_discipline", conn)

# Count the references for each discipline name
discipline_references = {}

for fk in foreign_keys:
    table_name = fk['table_name']
    column_name = fk['column_name']
    
    query = f"""
    SELECT d.name, COUNT(*) as reference_count
    FROM {table_name} t
    JOIN llm_education_survey_discipline d
    ON t.{column_name} = d.id
    GROUP BY d.name
    """
    try:
        ref_df = pd.read_sql_query(query, conn)
        
        for index, row in ref_df.iterrows():
            if row['name'] in discipline_references:
                discipline_references[row['name']] += row['reference_count']
            else:
                discipline_references[row['name']] = row['reference_count']
    except Exception as e:
        print(f"Error querying table {table_name}: {e}")

# Convert the discipline references to a DataFrame
discipline_references_df = pd.DataFrame(list(discipline_references.items()), columns=['Discipline Name', 'Reference Count'])

# Sort the DataFrame by Reference Count in descending order
discipline_references_df = discipline_references_df.sort_values(by='Reference Count', ascending=False)

# Close the connection
conn.close()

# Display the DataFrame using IPython display for better visualization
display(discipline_references_df)


Unnamed: 0,Discipline Name,Reference Count
24,Introduction to Programming,69
23,Introduction to CS,21
14,Data Science,10
19,Education in General,10
31,Software Engineering,6
0,Algorithms,5
34,Software security,4
33,Software Testing,4
32,Software Engineering (requirements engineering),4
28,OBJect Oriented Programming,4


## Education Levels

In [22]:
import sqlite3
import pandas as pd
from IPython.display import display

# Path to your SQLite database file
db_path = './db_llm_education_survey.sqlite3'  # Ensure this path is correct

# Connect to the SQLite database again
conn = sqlite3.connect(db_path)

# Dictionary to store the reference counts for each educational level
educationallevel_references = {}

# Query to count references in the llm_education_survey_analysis_educational_levels table
query = """
SELECT d.name, COUNT(*) as reference_count
FROM llm_education_survey_analysis_educational_levels t
JOIN llm_education_survey_educationallevel d
ON t.educationallevel_id = d.id
GROUP BY d.name
"""
ref_df = pd.read_sql_query(query, conn)

for index, row in ref_df.iterrows():
    educationallevel_references[row['name']] = row['reference_count']

# Convert the educational level references to a DataFrame
educationallevel_references_df = pd.DataFrame(list(educationallevel_references.items()), columns=['Educational Level Name', 'Reference Count'])

# Sort the DataFrame by Reference Count in descending order
educationallevel_references_df = educationallevel_references_df.sort_values(by='Reference Count', ascending=False)

# Close the connection
conn.close()

# Display the DataFrame using IPython display for better visualization
display(educationallevel_references_df)


Unnamed: 0,Educational Level Name,Reference Count
5,Undergrad,117
0,Grad,17
2,PhD,4
1,K-12,2
3,Professional,1
4,Professor/Teacher,1


## Language

In [25]:
import sqlite3
import pandas as pd
from IPython.display import display

# Path to your SQLite database file
db_path = './db_llm_education_survey.sqlite3'  # Ensure this path is correct

# Connect to the SQLite database
conn = sqlite3.connect(db_path)
cursor = conn.cursor()

# Get the list of all tables
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
tables = cursor.fetchall()

foreign_keys = []

# Iterate through each table and get foreign key constraints
for table in tables:
    table_name = table[0]
    cursor.execute(f"PRAGMA foreign_key_list({table_name});")
    fk_list = cursor.fetchall()
    for fk in fk_list:
        if fk[2] == 'llm_education_survey_language':
            foreign_keys.append({
                'table_name': table_name,
                'column_name': fk[3],
                'references_table': fk[2],
                'references_column': fk[4]
            })

# Count the references for each language name
language_references = {}

for fk in foreign_keys:
    table_name = fk['table_name']
    column_name = fk['column_name']
    
    query = f"""
    SELECT d.name, COUNT(*) as reference_count
    FROM {table_name} t
    JOIN llm_education_survey_language d
    ON t.{column_name} = d.id
    GROUP BY d.name
    """
    try:
        ref_df = pd.read_sql_query(query, conn)
        
        for index, row in ref_df.iterrows():
            if row['name'] in language_references:
                language_references[row['name']] += row['reference_count']
            else:
                language_references[row['name']] = row['reference_count']
    except Exception as e:
        print(f"Error querying table {table_name}: {e}")

# Convert the language references to a DataFrame
language_references_df = pd.DataFrame(list(language_references.items()), columns=['Language Name', 'Reference Count'])

# Sort the DataFrame by Reference Count in descending order
language_references_df = language_references_df.sort_values(by='Reference Count', ascending=False)

# Close the connection
conn.close()

# Display the DataFrame using IPython display for better visualization
display(language_references_df)



Unnamed: 0,Language Name,Reference Count
33,Python,70
16,Java,22
25,Not Mentioned,19
34,Q&A,14
24,MCQs,10
3,C,10
5,C++,10
18,JavaScript,7
7,CSS,6
13,HTML,6


## LLMs

In [26]:
import sqlite3
import pandas as pd
from IPython.display import display

# Path to your SQLite database file
db_path = './db_llm_education_survey.sqlite3'  # Ensure this path is correct

# Connect to the SQLite database
conn = sqlite3.connect(db_path)
cursor = conn.cursor()

# Get the list of all tables
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
tables = cursor.fetchall()

foreign_keys = []

# Iterate through each table and get foreign key constraints
for table in tables:
    table_name = table[0]
    cursor.execute(f"PRAGMA foreign_key_list({table_name});")
    fk_list = cursor.fetchall()
    for fk in fk_list:
        if fk[2] == 'llm_education_survey_llm':
            foreign_keys.append({
                'table_name': table_name,
                'column_name': fk[3],
                'references_table': fk[2],
                'references_column': fk[4]
            })

# Count the references for each LLM name
llm_references = {}

for fk in foreign_keys:
    table_name = fk['table_name']
    column_name = fk['column_name']
    
    query = f"""
    SELECT d.name, COUNT(*) as reference_count
    FROM {table_name} t
    JOIN llm_education_survey_llm d
    ON t.{column_name} = d.id
    GROUP BY d.name
    """
    try:
        ref_df = pd.read_sql_query(query, conn)
        
        for index, row in ref_df.iterrows():
            if row['name'] in llm_references:
                llm_references[row['name']] += row['reference_count']
            else:
                llm_references[row['name']] = row['reference_count']
    except Exception as e:
        print(f"Error querying table {table_name}: {e}")

# Convert the LLM references to a DataFrame
llm_references_df = pd.DataFrame(list(llm_references.items()), columns=['LLM Name', 'Reference Count'])

# Sort the DataFrame by Reference Count in descending order
llm_references_df = llm_references_df.sort_values(by='Reference Count', ascending=False)

# Close the connection
conn.close()

# Display the DataFrame using IPython display for better visualization
display(llm_references_df)



Unnamed: 0,LLM Name,Reference Count
6,ChatGPT,67
18,GPT-3.5-Turbo,34
19,GPT-4,27
12,CoPilot,10
21,GPT3,8
16,Codex,7
24,LLaMA-2,5
1,BARD,5
22,Gemini,3
8,ChatGPT Plus,3


## Educational Outcome

In [29]:
import sqlite3
import pandas as pd
from IPython.display import display

# Path to your SQLite database file
db_path = './db_llm_education_survey.sqlite3'  # Ensure this path is correct

# Connect to the SQLite database
conn = sqlite3.connect(db_path)
cursor = conn.cursor()

# Get the list of all tables
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
tables = cursor.fetchall()

foreign_keys = []

# Iterate through each table and get foreign key constraints
for table in tables:
    table_name = table[0]
    cursor.execute(f"PRAGMA foreign_key_list({table_name});")
    fk_list = cursor.fetchall()
    for fk in fk_list:
        if fk[2] == 'llm_education_survey_educationaloutcome':
            foreign_keys.append({
                'table_name': table_name,
                'column_name': fk[3],
                'references_table': fk[2],
                'references_column': fk[4]
            })

# Count the references for each educational outcome name
educational_outcome_references = {}

for fk in foreign_keys:
    table_name = fk['table_name']
    column_name = fk['column_name']
    
    query = f"""
    SELECT d.name, COUNT(*) as reference_count
    FROM {table_name} t
    JOIN llm_education_survey_educationaloutcome d
    ON t.{column_name} = d.id
    GROUP BY d.name
    """
    try:
        ref_df = pd.read_sql_query(query, conn)
        
        for index, row in ref_df.iterrows():
            if row['name'] in educational_outcome_references:
                educational_outcome_references[row['name']] += row['reference_count']
            else:
                educational_outcome_references[row['name']] = row['reference_count']
    except Exception as e:
        print(f"Error querying table {table_name}: {e}")

# Convert the educational outcome references to a DataFrame
educational_outcome_references_df = pd.DataFrame(list(educational_outcome_references.items()), columns=['Educational Outcome Name', 'Reference Count'])

# Sort the DataFrame by Reference Count in descending order
educational_outcome_references_df = educational_outcome_references_df.sort_values(by='Reference Count', ascending=False)

# Close the connection
conn.close()

# Adjust pandas display settings to show all rows
pd.set_option('display.max_rows', len(educational_outcome_references_df))

# Display the entire DataFrame using IPython display for better visualization
display(educational_outcome_references_df)

# Reset pandas display options if needed
pd.reset_option('display.max_rows')


Unnamed: 0,Educational Outcome Name,Reference Count
100,Student has positive experience towards LLms.,5
102,Students are postive towards using LLM in lear...,4
64,LLMs can help as an educational chatbot.,3
71,LLMs can help repairing buggy student codes.,3
70,LLMs can help novice programmers to write bett...,3
53,LLMs are still not well accepted in academia.,3
50,LLMs are heavily used by SE students.,2
29,LLM can generate helpful personalized learning...,2
42,LLMs (ChatGPT) can partially help data science...,2
48,LLMs are good at CS MCQs.,2


## Research Methodologies

In [28]:
import sqlite3
import pandas as pd
from IPython.display import display

# Path to your SQLite database file
db_path = './db_llm_education_survey.sqlite3'  # Ensure this path is correct

# Connect to the SQLite database
conn = sqlite3.connect(db_path)
cursor = conn.cursor()

# Get the list of all tables
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
tables = cursor.fetchall()

foreign_keys = []

# Iterate through each table and get foreign key constraints
for table in tables:
    table_name = table[0]
    cursor.execute(f"PRAGMA foreign_key_list({table_name});")
    fk_list = cursor.fetchall()
    for fk in fk_list:
        if fk[2] == 'llm_education_survey_researchmethodology':
            foreign_keys.append({
                'table_name': table_name,
                'column_name': fk[3],
                'references_table': fk[2],
                'references_column': fk[4]
            })

# Count the references for each research methodology name
research_methodology_references = {}

for fk in foreign_keys:
    table_name = fk['table_name']
    column_name = fk['column_name']
    
    query = f"""
    SELECT d.name, COUNT(*) as reference_count
    FROM {table_name} t
    JOIN llm_education_survey_researchmethodology d
    ON t.{column_name} = d.id
    GROUP BY d.name
    """
    try:
        ref_df = pd.read_sql_query(query, conn)
        
        for index, row in ref_df.iterrows():
            if row['name'] in research_methodology_references:
                research_methodology_references[row['name']] += row['reference_count']
            else:
                research_methodology_references[row['name']] = row['reference_count']
    except Exception as e:
        print(f"Error querying table {table_name}: {e}")

# Convert the research methodology references to a DataFrame
research_methodology_references_df = pd.DataFrame(list(research_methodology_references.items()), columns=['Research Methodology Name', 'Reference Count'])

# Sort the DataFrame by Reference Count in descending order
research_methodology_references_df = research_methodology_references_df.sort_values(by='Reference Count', ascending=False)

# Close the connection
conn.close()

# Display the DataFrame using IPython display for better visualization
display(research_methodology_references_df)



Unnamed: 0,Research Methodology Name,Reference Count
1,Case Study,52
10,User Study,41
9,Use Case,25
6,New Benchmark Dataset,11
7,New Framework,9
5,LLM-based Agent,6
0,A new Prompting Method,5
2,Evaluating multiple models,4
3,External Tool on LLM,3
8,New Tool,3


## Getting bibtexts

In [37]:
import sqlite3
import pandas as pd

# Path to your SQLite database file
db_path = './db_llm_education_survey.sqlite3'  # Ensure this path is correct

# Connect to the SQLite database
conn = sqlite3.connect(db_path)
cursor = conn.cursor()

# Query to join llm_education_survey_paper and llm_education_survey_discipline based on foreign keys
query = """
SELECT d.name as discipline, p.title, p.bibtex
FROM llm_education_survey_paper p
JOIN llm_education_survey_analysis a
ON p.id = a.paper_id
JOIN llm_education_survey_analysis_disciplines ad
ON a.id = ad.analysis_id
JOIN llm_education_survey_discipline d
ON ad.discipline_id = d.id
"""

# Execute the query and load the result into a DataFrame
result_df = pd.read_sql_query(query, conn)

# Close the connection
conn.close()

# Save the result to a CSV file
result_df.to_csv('discipline_papers.csv', index=False)

# Display the first few rows as a confirmation
print(result_df.head())


                    discipline  \
0  Introduction to Programming   
1  Introduction to Programming   
2           Introduction to CS   
3  Introduction to Programming   
4            Computer Networks   

                                               title  \
0  Evaluating the Quality of LLM-Generated Explan...   
1  Training Language Models for Programming Feedb...   
2  Comparative Quality Analysis of GPT-Based Mult...   
3  Assessing ChatGPT’s Proficiency in CS1-Level P...   
4  Performance of Large Language Models in a Comp...   

                                              bibtex  
0  @inproceedings{10.1145/3627217.3627233,\nautho...  
1  @inbook{Koutcheme_2023, title={Training Langua...  
2  @inbook{Gr_visse_2023, title={Comparative Qual...  
3  @inbook{S_nchez_2023, title={Assessing ChatGPT...  
4  @inbook{Kr_ger_2024, title={Performance of Lar...  
