# Table of Contents

The following is an analysis of the "responses.csv" document in the same folder. Ensure you run the Setup and Data Cleaning sections before running any analysis, as the rest of the code base depends on it. Here is an overview of the contents of this document:

1. Setup
2. Data Cleaning
3. Descriptive Statistics
4. Analysis
    - Art Familiarity vs. Number Correct
    - AI Familiarity vs. Number Correct
    - Generate Correlation Table
    - Normality Verification - Anderson-Darling Test
    - ANOVA, One-Way w/ AI Familiarity
    - ANOVA, One-Way w/ Art Familiarity
    - AI Familiarity Pairwise T-Test


# Setup

In [None]:
import pandas as pd
import scipy as spy
import matplotlib.pyplot as plt
import numpy as np
import statsmodels.stats.multicomp as mc

from sklearn.linear_model import LinearRegression as lr

ANSWER_KEY = ["Image 1", "Image 1", "Image 2", "Image 1", "Image 2", "Image 1", "Image 2", "Image 2", "Image 1", "Image 1"]

df = pd.read_csv("responses.csv")

# Data Cleaning

In [None]:
# normalize the names of the features
df = df.rename(columns={"Timestamp" : "time",
                        "What is your age?" : "age",
                        "What is your highest level of education?" : "education",
                        "What is your job? (Say \"N/A\" if not applicable, and \"Student\" if you're a student)" : "job",
                        "How familiar are you with visual art? (Paintings, digital, sketches, etc.)" : "art_familiarity",
                        "Please describe your experience with art in a sentence, including mediums if you are an artist. If you have no familiarity with art, say \"None.\"" : "art_desc",
                        "How familiar are you with artificial intelligence (AI)?" : "ai_familiarity",
                        "What's your opinion on AI-generated art?" : "ai_opinion",
                        "Which image did you like more?" : "pref_0",
                        "Which image do you think is AI generated?" : "ai_0",
                        "Which image did you like more?.1" : "pref_1",
                        "Which image do you think is AI generated?.1" : "ai_1",
                        "Which image did you like more?.2" : "pref_2",
                        "Which image do you think is AI generated?.2" : "ai_2",
                        "Which image did you like more?.3" : "pref_3",
                        "Which image do you think is AI generated?.3" : "ai_3",
                        "Which image did you like more?.4" : "pref_4",
                        "Which image do you think is AI generated?.4" : "ai_4",
                        "Which image did you like more?.5" : "pref_5",
                        "Which image do you think is AI generated?.5" : "ai_5",
                        "Which image did you like more?.6" : "pref_6",
                        "Which image do you think is AI generated?.6" : "ai_6",
                        "Which image did you like more?.7" : "pref_7",
                        "Which image do you think is AI generated?.7" : "ai_7",
                        "Which image did you like more?.8" : "pref_8",
                        "Which image do you think is AI generated?.8" : "ai_8",
                        "Which image did you like more?.9" : "pref_9",
                        "Which image do you think is AI generated?.9" : "ai_9"})

# drop the haters
df["age"] = pd.to_numeric(df["age"], errors="coerce")
df = df.dropna(subset=["age"])

# add a feature for whether or not they got a question right
for index in range(10):
    df["ans_" + str(index)] = df["ai_" + str(index)] == ANSWER_KEY[index]

# add a feature for total number of correct answers    
df["num_correct"] = 0
for i, row in df.iterrows():
    for index in range(10):
        if row["ans_" + str(index)] == True:
            df["num_correct"][i] = df["num_correct"][i] + 1

# Descriptive Statistics
Current df struct:
|time          |age           |education     |job           |art_familiarity|art_desc     |ai_familitiary |ai_opinion   |pref_n       |ai_n         |ans_n        |
|:------------:|:------------:|:------------:|:------------:|:------------:|:-----------:|:-----------:|:-----------:|:-----------:|:-----------:|:-----------:|
|datetime|int|str|str|int; 1 - 5|str|int; 1 - 5|int; -1, 0, 1|"Image k"|"Image k"|boolean|

In [None]:
df.describe()

# Analysis

## Art Familiarity vs. Number Correct

In [None]:
# create scatter plots w/ linear regression models

plt.scatter(df["art_familiarity"], df["num_correct"])
df["num_correct"]
lr1 = lr()
X = df["art_familiarity"].to_numpy().reshape(-1, 1)
x = df["art_familiarity"]
y =  df["num_correct"]
lr1.fit(X, y)
b = lr1.intercept_
m = lr1.coef_
plt.plot(x, m*x+b, "-r")
y_hat = lr1.predict(X)
print(np.average(abs(y - y_hat)))
print(np.sum(abs(y - y_hat))/len(y))
plt.title("Art Familiarity vs. Number Correct")
plt.xlabel("Art Familiarity")
plt.ylabel("Number Correct")

## AI Familiarity vs. Number Correct

In [None]:
# create scatter plots w/ linear regression models

plt.scatter(df["ai_familiarity"], df["num_correct"])
df["num_correct"]
linreg = lr()
X1 = df["ai_familiarity"].to_numpy().reshape(-1, 1)
x1 = df["ai_familiarity"]
y1 =  df["num_correct"]
linreg.fit(X1, y1)
b1 = linreg.intercept_
m1 = linreg.coef_
plt.plot(x1, m1*x1+b1, "-r")
y_hat1 = linreg.predict(X1)
print(np.average(abs(y1 - y_hat1)))
print(np.sum(abs(y1 - y_hat1))/len(y1))
plt.title("AI Familiarity vs. Number Correct")
plt.xlabel("AI Familiarity")
plt.ylabel("Number Correct")

## Generate Correlation Table

In [None]:
df.corr(numeric_only=True)

## Normality Verification - Anderson-Darling Test

In [None]:
# perform Anderson Test for normality

print(spy.stats.anderson(df["num_correct"], dist="norm"))
plt.hist(df["num_correct"])
plt.title("Number Correct")

plt.show()
plt.hist(df["ai_familiarity"],bins=5)
print(spy.stats.anderson(df["ai_familiarity"], dist="norm"))
plt.title("AI Familiarity")

plt.show()
plt.hist(df["art_familiarity"],bins=5)
plt.title("Art Familiarity")

print(spy.stats.anderson(df["art_familiarity"], dist="norm"))




## ANOVA, One-Way w/ AI Familiarity

In [None]:
# Ai familiarity ANOVA
ai_familiarity1 = df.loc[df["ai_familiarity"] == 1]
ai_familiarity2 = df.loc[df["ai_familiarity"] == 2]
ai_familiarity3 = df.loc[df["ai_familiarity"] == 3]
ai_familiarity4 = df.loc[df["ai_familiarity"] == 4]
ai_familiarity5 = df.loc[df["ai_familiarity"] == 5]
spy.stats.f_oneway(ai_familiarity1["num_correct"], ai_familiarity2["num_correct"], ai_familiarity3["num_correct"], ai_familiarity4["num_correct"], ai_familiarity5["num_correct"])



## ANOVA, One-Way w/ Art Familiarity

In [None]:
# Art familiarity ANOVA
art_familiarity1 = df.loc[df["art_familiarity"] == 1]
art_familiarity2 = df.loc[df["art_familiarity"] == 2]
art_familiarity3 = df.loc[df["art_familiarity"] == 3]
art_familiarity4 = df.loc[df["art_familiarity"] == 4]
art_familiarity5 = df.loc[df["art_familiarity"] == 5]
spy.stats.f_oneway(art_familiarity1["num_correct"], art_familiarity2["num_correct"], art_familiarity3["num_correct"], art_familiarity4["num_correct"], art_familiarity5["num_correct"])



## AI Familiarity Pairwise T-Test

In [None]:
# AI familiarity Pairwise T-test
comp1 = mc.MultiComparison(df["num_correct"], df["ai_familiarity"])
tbl, a1, a2 = comp1.allpairtest(spy.stats.ttest_ind, method= "bonf")
print(tbl)