In [None]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
os_df = pd.read_csv("./Datasets/os-very-clean.csv")
cs2_df = pd.read_csv("./Datasets/cs2-very-clean.csv")
os_df.drop(["QID11_8_TEXT", "QID19_5_TEXT", "QID45_12_TEXT"], axis=1, inplace=True)
cs2_df.drop(["QID11_8_TEXT", "QID19_5_TEXT", "QID45_12_TEXT"], axis=1, inplace=True)

allDF = pd.concat([os_df,cs2_df], ignore_index = True)

allDF.shape

Index.txt has more information regarding the questions.

In [None]:
DEMOGRAPHICS = ["QID4", "QID66", "QID9", "QID11", "QID43", "QID10_TEXT"]
CONFIDENCE = ["QID27_4", "QID27_16","QID27_5","QID27_18","QID27_20", "QID27_21"]

AI_USE_TRUST = ["QID49_1", "QID49_2", "QID49_4", "QID49_5", "QID49_6", "QID49_7"]
AI_USE_CASES = ["QID46_1", "QID46_12", "QID46_3", "QID46_4", "QID46_10"]
AI_USE_EXPOSURE = ["QID64_1", "QID64_3", "QID64_6", "QID64_7", "QID64_8"]
WORRY_USE = ["QID64_7"]

AI_HEARD_TRUST = ["QID61_1", "QID61_2", "QID61_10", "QID61_5", "QID61_6", "QID61_7"]
AI_HEARD_EXPOSURE = [ "QID60_1", "QID60_3", "QID60_6", "QID60_7"]
WORRY_HEARD = ["QID60_7"]

HACKATHON_RANKINGS = ["QID39_1", "QID39_2", "QID39_3", "QID39_4", "QID39_5", "QID39_6"]

In [None]:
tempList = DEMOGRAPHICS + AI_USE_CASES

tempDF = allDF[tempList].dropna(inplace=False)
useCount = len(tempDF)

tempList = DEMOGRAPHICS + AI_HEARD_TRUST
tempDF = allDF[tempList].dropna(inplace=False)
heardCount = len(tempDF)

totalCount = len(allDF)

print(f"Proportions heard vs used: {heardCount/totalCount} vs {useCount/totalCount}")

In [None]:
corrDF = allDF.copy()
corrDF.drop(["QID19", "QID45", "QID62"], inplace=True, axis=1)
corr = corrDF.corr()
#plt.figure(figsize=(20,20))
#sns.heatmap(corr, annot=True, cmap=plt.cm.Reds)
#plt.show()

In [None]:
allDF["QID11"].value_counts()

### Q1

In [None]:
useTrustDF = allDF[AI_USE_TRUST]
useTrustDF.dropna(inplace=True)
useTrustDF.shape

In [None]:
heardTrustDF = allDF[AI_HEARD_TRUST]
heardTrustDF.dropna(inplace=True)
heardTrustDF.shape

In [None]:
new_columns = {
    'QID49_1': 'Trust1',
    'QID49_2': 'Trust2',
    'QID49_4': 'Trust3',
    'QID49_5': 'Trust4',
    'QID49_6': 'Trust5',
    'QID49_7': 'Trust6',
    'QID61_1': 'Trust1',
    'QID61_2': 'Trust2',
    'QID61_10': 'Trust3',
    'QID61_5': 'Trust4',
    'QID61_6': 'Trust5',
    'QID61_7': 'Trust6'
}

useTrustDF.rename(columns=new_columns, inplace=True)
heardTrustDF.rename(columns=new_columns, inplace=True)

In [None]:
trustDF = pd.concat([useTrustDF, heardTrustDF], ignore_index = True)

In [None]:
newTrustDF = trustDF.copy()
newTrustDF.drop(columns=["Trust1"], inplace=True) # drop experience, trust1 dropped due to similarity to another question
combinedTrust = []
for row in newTrustDF.iterrows():
    trustMean = row[1][1:].mean()
    combinedTrust.append(trustMean)
trustDF["TrustScore"] = combinedTrust
trustDF

In [None]:
# Get Trust Score above 3
distrust = len(trustDF[trustDF["TrustScore"] < 3])
neutral = len(trustDF[trustDF["TrustScore"] == 3])
trust = len(trustDF[trustDF["TrustScore"] > 3])
total = len(trustDF)
print("Distrust: ", distrust)
print("Neutral: ", neutral)
print("Trust: ", trust)
print("Total: ", total)
print("Distrust %: ", distrust/total)
print("Neutral %: ", neutral/total)
print("Trust %: ", trust/total)

In [None]:
fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(5,5))
sns.violinplot(y="TrustScore", data=trustDF, cut=0)
plt.savefig("./Figures/TrustScore.png")

### Q2

In [None]:
perceptionOfHelpful = allDF[["QID13"] + AI_USE_CASES]
perceptionOfHelpful.dropna(inplace=True)

In [None]:
new_columns = {
    "QID13": "User",
    "QID46_1": "Helps Complete Tasks",
    "QID46_12": "Helps When Stuck",
    "QID46_3": "Helps Learn Concepts",
    "QID46_4": "Makes Me More Confident",
    "QID46_10": "Overall improved motivation and engagement",
}

perceptionOfHelpful.rename(columns=new_columns, inplace=True)

In [None]:
perceptionOfHelpful.describe()

In [None]:
sns.countplot(x="Helps Complete Tasks", data=perceptionOfHelpful)
plt.title("Helps Complete Tasks")
plt.xlabel("Helps Complete Tasks - Score Higher is Better")
plt.ylabel("Count")
plt.savefig("./Figures/HelpsCompleteTasks.png")

In [None]:
sns.countplot(x="Helps When Stuck", data=perceptionOfHelpful)
plt.title("Helps When Stuck")
plt.xlabel("Helps When Stuck - Score Higher is Better")
plt.ylabel("Count")
plt.savefig("./Figures/HelpsWhenStuck.png")

In [None]:
sns.countplot(x="Helps Learn Concepts", data=perceptionOfHelpful)
plt.title("Helps Learn Concepts")
plt.xlabel("Helps Learn Concepts - Score Higher is Better")
plt.ylabel("Count")
plt.savefig("./Figures/HelpsLearnConcepts.png")

In [None]:
sns.countplot(x="Makes Me More Confident", data=perceptionOfHelpful)
plt.title("Makes Me More Confident")
plt.xlabel("Makes Me More Confident - Score Higher is Better")
plt.ylabel("Count")
plt.savefig("./Figures/MakesMeMoreConfident.png")

In [None]:
sns.countplot(x="Overall improved motivation and engagement", data=perceptionOfHelpful)
plt.title("Overall improved motivation and engagement")
plt.xlabel("Overall improved motivation and engagement - Score Higher is Better")
plt.ylabel("Count")
plt.savefig("./Figures/OverallImprovedMotivationAndEngagement.png")

### Q3

In [None]:
perceptionOfTools = allDF[["QID13"] + AI_USE_EXPOSURE]
perceptionOfTools.dropna(inplace=True)

In [None]:
perceptionOfTools2 = allDF[["QID13"] + AI_HEARD_EXPOSURE]
perceptionOfTools2.dropna(inplace=True)

In [None]:
new_columns = {
    'QID13': 'User',
    'QID64_1': 'Encouraged',
    'QID64_3': 'Professional',
    'QID64_6': 'Others Use',
    'QID64_7': 'Worry',
    'QID64_8': 'Lesser',
    'QID60_1': 'Encouraged',
    'QID60_3': 'Professional',
    'QID60_6': 'Others Use',
    'QID60_7': 'Worry',
}

perceptionOfTools.rename(columns=new_columns, inplace=True)
perceptionOfTools2.rename(columns=new_columns, inplace=True)

In [None]:
perceptionOfTools.describe()

In [None]:
perceptionOfTools2.describe()

In [None]:
perceptionOfTools = pd.concat([perceptionOfTools, perceptionOfTools2], ignore_index = True)
perceptionOfTools["User"] = perceptionOfTools["User"].replace({1: "User", 2: "Non-User"})
perceptionOfTools

In [None]:
sns.violinplot(x="User", y="Encouraged", data=perceptionOfTools, cut=0)
plt.title("Users Encouraged?")
plt.savefig("./Figures/Encouraged.png")

In [None]:
sns.violinplot(x="User", y="Professional", data=perceptionOfTools, cut=0)
plt.title("Users Think AI is Professional?")
plt.savefig("./Figures/Professional.png")

In [None]:
sns.violinplot(x="User", y="Others Use", data=perceptionOfTools, cut=0)
plt.title("Users Know Others Use AI?")
plt.savefig("./Figures/OthersUse.png")

In [None]:
sns.violinplot(x="User", y="Worry", data=perceptionOfTools, cut=0)
plt.title("Users Worry AI will replace them")
plt.savefig("./Figures/Worry.png")

### Q4

In [None]:
perceptionConfidence = allDF[["QID13"] + CONFIDENCE]
perceptionConfidence.dropna(inplace=True)

In [None]:
perceptionConfidence.columns

In [None]:
new_columns = {
    'QID13': 'User',
    'QID27_4': 'Complete tasks alone',
    'QID27_16': 'Complete tasks when working with someone',
    'QID27_5': 'Complete tasks when they have someone to help',
    'QID27_18': 'Finding steps to solve a problem',
    'QID27_20': 'Finding ways out when they are stuck on a problem',
}

perceptionConfidence.rename(columns=new_columns, inplace=True)
perceptionConfidence.drop(columns=["QID27_21"], inplace=True)

In [None]:
perceptionConfidence["User"] = perceptionConfidence["User"].replace({1: "User", 2: "Non-User"})

In [None]:
perceptionConfidence.describe()

In [None]:
sns.violinplot(x="User", y="Complete tasks alone", data=perceptionConfidence, cut=0)
plt.title("Users Complete Tasks Alone?")
plt.savefig("./Figures/CompleteTasksAlone.png")

In [None]:
sns.violinplot(x="User", y="Complete tasks when working with someone", data=perceptionConfidence, cut=0)
plt.title("Users Complete Tasks When Working With Someone?")
plt.savefig("./Figures/CompleteTasksWhenWorkingWithSomeone.png")

In [None]:
sns.violinplot(x="User", y="Complete tasks when they have someone to help", data=perceptionConfidence, cut=0)
plt.title("Users Complete Tasks When they Have Someone To Help?")
plt.savefig("./Figures/CompleteTasksWhenIHaveSomeoneToHelp.png")

In [None]:
sns.violinplot(x="User", y="Finding steps to solve a problem", data=perceptionConfidence, cut=0)
plt.title("Users Finding Steps to Solve a Problem?")
plt.savefig("./Figures/FindingStepsToSolveAProblem.png")

In [None]:
sns.violinplot(x="User", y="Finding ways out when they are stuck on a problem", data=perceptionConfidence, cut=0)
plt.title("Users Finding Ways Out When They Are Stuck On A Problem?")
plt.savefig("./Figures/FindingWaysOutWhenTheyAreStuckOnAProblem.png")

In [None]:
# Get mean of each column
perceptionConfidenceMean = perceptionConfidence.groupby("User").mean()
perceptionConfidenceMean