## User Stats Tables for Latex

In [3]:
import pandas as pd
import sqlite3

from scipy.stats import chi2_contingency

conn = sqlite3.connect("../giicg.db")
users = pd.read_sql("Select * from users WHERE lastpage >= 3", conn)
conn.close()
users = users[users['gender'].isin(['Woman (cisgender)', 'Man (cisgender)'])]
users['work_exp_years'] = pd.to_numeric(users['work_exp_years'], errors='coerce')
users['study_year'] = pd.to_numeric(users['study_year'], errors='coerce')

## Helper function

In [19]:
import pandas as pd
from scipy.stats import chi2_contingency, fisher_exact


def make_grouped_latex_table(user_df, question_labels):

    agree_group = ["Agree", "Strongly Agree"]
    disagree_group = ["Disagree", "Strongly disagree"]
    def group_response(r):
        if pd.isna(r):
            return None
        if r in agree_group:
            return "Agree"
        if r == "Neutral":
            return "Neutral"
        if r in disagree_group:
            return "Disagree"
        return None


    questions = list(question_labels.keys())
    results = []

    for q in questions:
        temp = user_df[['gender', q]].copy()
        temp['group'] = temp[q].map(group_response)
        temp = temp[temp['group'].notnull()]

        total_replies = len(temp)

        # Per gender
        gender_results = {}
        for gender in ['Woman (cisgender)', 'Man (cisgender)']:
            gender_temp = temp[temp['gender'] == gender]
            gender_total = len(gender_temp)
            if gender_total > 0:
                agree_p = (gender_temp['group'] == "Agree").sum() / gender_total
                neutral_p = (gender_temp['group'] == "Neutral").sum() / gender_total
                disagree_p = (gender_temp['group'] == "Disagree").sum() / gender_total
            else:
                agree_p = neutral_p = disagree_p = 0
            gender_results[gender] = (agree_p, neutral_p, disagree_p)

        res = [
            question_labels[q],
            gender_results['Woman (cisgender)'][0],   # agree female
            gender_results['Man (cisgender)'][0],     # agree male
            gender_results['Woman (cisgender)'][1],   # neutral female
            gender_results['Man (cisgender)'][1],     # neutral male
            gender_results['Woman (cisgender)'][2],   # disagree female
            gender_results['Man (cisgender)'][2],     # disagree male
            total_replies                              # total replies (all genders)
        ]
        results.append(res)

    final_df = pd.DataFrame(results, columns=[
        "question",
        "agree female", "agree male",
        "neutral female", "neutral male",
        "disagree female", "disagree male",
        "total replies"
    ])

    return final_df


def make_yesno_latex_table(user_df, question_labels):
    results = []

    for q in question_labels.keys():
        temp = user_df[['gender', q]].copy()
        temp = temp[temp[q].isin(['Yes', 'No'])]  # Only keep Yes/No replies

        total_replies = len(temp)

        gender_results = {}
        for gender in ['Woman (cisgender)', 'Man (cisgender)']:
            gender_temp = temp[temp['gender'] == gender]
            gender_total = len(gender_temp)
            if gender_total > 0:
                yes_p = (gender_temp[q] == "Yes").sum() / gender_total
                no_p = (gender_temp[q] == "No").sum() / gender_total
            else:
                yes_p = no_p = 0
            gender_results[gender] = (yes_p, no_p)

        res = [
            question_labels[q],
            gender_results['Woman (cisgender)'][0],  # yes female
            gender_results['Man (cisgender)'][0],    # yes male
            gender_results['Woman (cisgender)'][1],  # no female
            gender_results['Man (cisgender)'][1],    # no male
            total_replies                            # total replies for this question
        ]
        results.append(res)

    final_df = pd.DataFrame(results, columns=[
        "question",
        "yes female", "yes male",
        "no female", "no male",
        "total replies"
    ])

    return final_df

def chi_squared(df, col_name):
    contingency = pd.crosstab(df["gender"], df[col_name])

    print(contingency)  # To see the table

    chi2, p, dof, expected = chi2_contingency(contingency)

    print(f"Chi2={chi2:.2f}, p={p:.4f}, dof={dof}")

def fishers_exact(df, col_name):
    contingency = pd.crosstab(df["gender"], df[col_name])
    table = contingency.values
    odds_r, p_value = fisher_exact(table, alternative="two-sided")
    print(f"Odds Ratio: {odds_r:.2f}, p-value: {p_value:.4f}")



## LLM Questions

In [7]:
llm_question_labels = {
    "llms_helpful": "LLMs are helpful for writing code.",
    "llms_enjoy": "I enjoy using LLMs in my work or for studying.",
    "llms_go_back": "I would like to go back to the time before they existed.",
    "llms_faster": "I have become faster at learning new programming skills through using LLMs.",
    "llms_slower": "I have become slower at learning new programming skills through using LLMs.",
    "llms_prompt_engineering": "I am employing specific prompting techniques when prompting LLMs.",
    "llms_approach": "My way to approach new programming challenges has changed through using LLMs.",
    "llms_ethical_concerns": "I have ethical concerns about the technology."
}

table = make_grouped_latex_table(users, llm_question_labels)
table

Unnamed: 0,question,agree female,agree male,neutral female,neutral male,disagree female,disagree male,total replies
0,LLMs are helpful for writing code.,1.0,0.866667,0.0,0.066667,0.0,0.066667,27
1,I enjoy using LLMs in my work or for studying.,0.75,0.666667,0.166667,0.266667,0.083333,0.066667,27
2,I would like to go back to the time before the...,0.166667,0.142857,0.083333,0.285714,0.75,0.571429,26
3,I have become faster at learning new programmi...,0.416667,0.733333,0.166667,0.2,0.416667,0.066667,27
4,I have become slower at learning new programmi...,0.416667,0.133333,0.333333,0.133333,0.25,0.733333,27
5,I am employing specific prompting techniques w...,0.583333,0.428571,0.083333,0.142857,0.333333,0.428571,26
6,My way to approach new programming challenges ...,0.916667,0.571429,0.083333,0.214286,0.0,0.214286,26
7,I have ethical concerns about the technology.,0.833333,0.928571,0.166667,0.071429,0.0,0.0,26


In [17]:
for question in llm_question_labels.keys():
    chi_squared(users, question)


llms_helpful       Agree  Disagree  Neutral  Strongly Agree
gender                                                     
Man (cisgender)        7         1        1               6
Woman (cisgender)      5         0        0               7
Chi2=2.10, p=0.5513, dof=3
llms_enjoy         Agree  Disagree  Neutral  Strongly Agree
gender                                                     
Man (cisgender)        7         1        4               3
Woman (cisgender)      4         1        2               5
Chi2=1.67, p=0.6431, dof=3
llms_go_back       Agree  Disagree  Neutral  Strongly Agree  Strongly disagree
gender                                                                        
Man (cisgender)        1         4        4               1                  4
Woman (cisgender)      2         6        1               0                  3
Chi2=3.54, p=0.4713, dof=4
llms_faster        Agree  Disagree  Neutral  Strongly Agree  Strongly disagree
gender                                      

## Bias Questions

In [10]:
bias_question_labels = {
    "llms_ec_bias": "LLMs can reproduce bias",
    "llms_ec_discrimination": "LLMs can reproduce discrimination.",
    "llms_ec_skill_loss": "LLMs can lead to a loss of skill.",
    "llms_ec_learn_less": "LLM users can learn less.",
    "llms_ec_diversity_loss": "LLMs can lead to a loss of diversity in coding and writing styles",
}

table = make_yesno_latex_table(users, bias_question_labels)
table

Unnamed: 0,question,yes female,yes male,no female,no male,total replies
0,LLMs can reproduce bias,0.8,0.769231,0.2,0.230769,23
1,LLMs can reproduce discrimination.,0.6,0.461538,0.4,0.538462,23
2,LLMs can lead to a loss of skill.,0.8,0.692308,0.2,0.307692,23
3,LLM users can learn less.,0.3,0.538462,0.7,0.461538,23
4,LLMs can lead to a loss of diversity in coding...,0.8,0.615385,0.2,0.384615,23


In [18]:
for question in bias_question_labels.keys():
    chi_squared(users, question)

llms_ec_bias       No  Yes
gender                    
Man (cisgender)     3   10
Woman (cisgender)   2    8
Chi2=0.00, p=1.0000, dof=1
llms_ec_discrimination  No  Yes
gender                         
Man (cisgender)          7    6
Woman (cisgender)        4    6
Chi2=0.06, p=0.8119, dof=1
llms_ec_skill_loss  No  Yes
gender                     
Man (cisgender)      4    9
Woman (cisgender)    2    8
Chi2=0.01, p=0.9171, dof=1
llms_ec_learn_less  No  Yes
gender                     
Man (cisgender)      6    7
Woman (cisgender)    7    3
Chi2=0.52, p=0.4719, dof=1
llms_ec_diversity_loss  No  Yes
gender                         
Man (cisgender)          5    8
Woman (cisgender)        2    8
Chi2=0.25, p=0.6193, dof=1


In [20]:
for question in bias_question_labels.keys():
    fishers_exact(users, question)

Odds Ratio: 1.20, p-value: 1.0000
Odds Ratio: 1.75, p-value: 0.6802
Odds Ratio: 1.78, p-value: 0.6600
Odds Ratio: 0.37, p-value: 0.4015
Odds Ratio: 2.50, p-value: 0.4050


## LLM Approach open replies

Female:

"Before I started using LLMs, i often got stuck dealing with small bugs and syntax issues. Now, i’m able to focus on understanding the logic etc."
"For simple programming, it is faster for me to let the LLMs write the basic code and it is usually quicker to clarify simple questions than to look them up on internet forums. For more complex problems, however, the AI makes mistakes and is not as reliable. When it comes to fixing errors, I often find that I ask the LLMs directly instead of thinking about the problem myself, even though this would often be the quicker way."
"I am brainstorming ""together"" with the LLM before I start programming. I ask for a first draft of the code I am planning to develop and than adapt that to my specific needs.  "
"I am now mostly describing my problems in natural language to LLMs, I used to search on Google and try different stratigies."
"I use LLMs to consult me regarding the setup of the coding project, which frameworks & languages to use."
"I use my brain less and I have become lazier, which makes debugging or doing anything complex quite exhausting as I'm not always familiar with the code that the LLM has generated"
I've forgotten how to program on my own. And I've lost the ability to think about a problem independently.
It’s quicker to get into the topics.
Very often now it's a lot of vibe coding for new challenges instead of reading documentation properly.

Male:

"- bin sehr faul geworden was lösen von Coding-Problemen angeht, weil man sich auf Stack Overflow durch alles mögliche wühlen musste und am Ende noch angeschnauzt wird
- Ich gebe mich oft mit Lösungen zufrieden ohne nachzuschauen ob das wirklich die beste oder effizienteste Lösung ist
- Ich bin eher dazu geneigt schneller Chatti zu fragen anstatt erstmal selbst zu überlegen was das Problem sein könnte"
Faster development
I can now write production code in languages that I am not proficient in and using frameworks that are new to me.
"I do not start from scratch anymore. I let ChatGPT start with an initial attempt to solve my ""problem"" and then I modify it's ideas."
"If the challenges don't look interesting to me, I often try to let an LLM find a solution before I do it. This mostly replaces my StackOverflow searches."
LLMs enable me to get specific information related to me problems. It may not provide to correct answer every time. But the general direction of the solutions are often helpful.
The first step is usually trying to see how far I can get with an LLM before I code things by hand. If I encounter an error that I cannot fix immediately I tend to try to make the LLM fix it before I debug myself.
for tasks that are not interesting to me i let LLMs make a draft first
