In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

In [None]:
score_mapping = {
    'strongly appealing': 5, 
    'very important': 5,
    'appealing': 4,
    'important': 4,
    'neutral': 3, 
    'unappealing': 2, 
    'not important': 2,
    'strongly unappealing': 1,
    'not at all important': 1
}
categories = {
    'LAYOUT': ['Density', 'Colour Scheme', 'Order', 'Balance', 'Other'],
    'TEXT': ['Readability', 'Information Content', 'Other'],
    'IMAGE': ['Colour Scheme', 'Information Content', 'Other']
}

In [None]:
def get_weightings(df):
    weightings = {}
    columns = df.columns
    weighting_columns = [c for c in columns if 'Weighting' in c]
    
    weightings = df[weighting_columns]
    weightings.columns = ['LAYOUT', 'TEXT', 'IMAGE']
        
    return weightings

def get_answers(df, n):
    columns = df.columns
    image_columns = [c for c in columns if c.startswith(str(n)+'.')]
    
    return [score_mapping[a] for a in df[image_columns].iloc[1].values]

def get_answer_matrix(df):
    answer_columns = ['1_1', '1_2', '1_3', '1_4', '1_5', '2_1', '2_2', '2_3', '3_1', '3_2', '3_3']
    dff = pd.DataFrame()
    for n in range(1, 101):
        dff = pd.concat([dff, pd.DataFrame([get_answers(df, n)], columns=answer_columns, index=[n])])
        
    return dff

In [None]:
def load_survey(n):
    df = pd.read_excel(n)
    df = df.loc[df.DistributionChannel!='preview']
    df.drop(index=0, inplace=True)

    background = df['0.Background'].values
    for i in range(1, len(background)):
        if not isinstance(background[i], str):
            background[i] = 'Other'
        # if (background[i] == 'Other') & isinstance(df['0.Background_3_TEXT'].iloc[i], str):
        #    background[i] += ' - ' + df['0.Background_3_TEXT'].iloc[i]
    df['Background'] = background

    selfdesc = df['0.SelfDesc'].values
    for i in range(1, len(selfdesc)):
        if not isinstance(selfdesc[i], str):
            selfdesc[i] = 'Other'
        # if (selfdesc[i] == 'Other') & isinstance(df['0.SelfDesc_3_TEXT'].iloc[i], str):
        #    selfdesc[i] += ' - ' + df['0.SelfDesc_3_TEXT'].iloc[i]
    df['SelfDesc'] = selfdesc

    columns = df.columns
    df = df[[columns[7]] + list(columns[-2:]) + list(columns[16:-3])]
    columns = df.columns

    for c in columns[3:]:
        df[c] = df[c].str.lower().map(score_mapping)
        
    return df

In [None]:
df_self = load_survey('UI Eval Self_May 6, 2023_07.39.xlsx')
df = load_survey('Evaluation of User Interface Layout and Components_May 5, 2023_12.02.xlsx')
df = pd.concat([df_self, df], ignore_index=True)

In [None]:
def get_weightings(df):
    weightings = {}
    columns = df.columns
    weighting_columns = [c for c in columns if 'Weighting' in c]
    
    weightings = df[weighting_columns]
    weightings.columns = list(categories.keys())
        
    return weightings

In [None]:
b = df.groupby('Background').Background.count()/len(df)
plt.bar(b.index, b.values, color=['r', 'b', 'g'])
plt.title('Expert Background')
plt.ylabel('Weight')
plt.show()

In [None]:
b = df.groupby('SelfDesc').Background.count()/len(df)
plt.bar(b.index, b.values, color=['r', 'g', 'b'])
plt.title('Expert Self Description')
plt.ylabel('Weight')
plt.show()

In [None]:
fig, ax = plt.subplots()
ax.axis('equal')
width = 0.3

cm = plt.get_cmap("tab10")
cout = cm(np.array([1, 2, 3]))

b = df.groupby('Background').Background.count()/len(df)
pie, _ = ax.pie(b.values, radius=1, labels=b.index, colors=cout)
plt.setp( pie, width=width, edgecolor='white')

cm = plt.get_cmap("Pastel1")
cin = cm(np.array([1, 2, 3]))
b = df.groupby('SelfDesc').Background.count()/len(df)
pie2, _ = ax.pie(b.values, radius=0.5, labels=b.index,
                                      labeldistance=0.65, colors=cin)
plt.setp( pie2, width=width, edgecolor='white')
plt.show()

In [None]:
m = get_weightings(df).mean()
lab = [f'{c}: {m[c]:.2f}' for c in get_weightings(df).columns]
plt.hist(get_weightings(df), label=lab)
plt.legend()
plt.xlabel('Score')
plt.ylabel('Count')
plt.show()

In [None]:
def get_answers(df, n):
    columns = df.columns
    image_columns = [c for c in columns if c.startswith(str(n)+'.')]
    
    return [score_mapping[a] for a in df[image_columns].iloc[1].values]

In [None]:
columns = df.columns
a, b, c = [], [], []
for n in range(1, 101):
    image_columns = [c for c in columns if c.startswith(str(n)+'.')]
    a += list(df[image_columns[:5]].dropna().values.flatten())
    b += list(df[image_columns[5:8]].dropna().values.flatten())
    c += list(df[image_columns[8:]].dropna().values.flatten())
print(f'layout mean = {np.array(a).mean():.2f}, std = {np.array(a).std():.2f}')
print(f'text mean = {np.array(b).mean():.2f}, std = {np.array(b).std():.2f}')
print(f'image mean = {np.array(c).mean():.2f}, std = {np.array(c).std():.2f}')

In [None]:
a, b, c = [], [], []
for n in range(1, 101):
    image_columns = [c for c in columns if c.startswith(str(n)+'.')]
    a.append(df[image_columns[:5]].dropna().values.flatten().mean())
    b.append(df[image_columns[5:8]].dropna().values.flatten().mean())
    c.append(df[image_columns[8:]].dropna().values.flatten().mean())

In [None]:
plt.hist((np.array(a) + np.array(b) + np.array(c))/3, 20, density=True)
plt.ylabel('Density')
plt.xlabel('UI Score')
plt.title('Expert Scoring Distribution')
plt.show()

In [None]:
from numpy.polynomial.polynomial import polyfit
bb, m = polyfit(np.array(a), np.array(c), 1)

plt.scatter(np.array(a), np.array(c), c='b')
plt.plot(np.arange(1.5, 4.2, 0.1), bb + m * np.arange(1.5, 4.2, 0.1), '-', c='r')
plt.grid()
plt.xlabel('Layout Score')
plt.ylabel('Image Score')
plt.show()

In [None]:
social = pd.read_csv('100/100_with_social.csv')

In [None]:
image_id = [int(fn.split('.')[0]) for fn in social.fn]

In [None]:
ex_score = [(a[i-1]+b[i-1]+c[i-1])/3 for i in image_id]

In [None]:
social['expert'] = ex_score

In [None]:
cc = np.corrcoef(social.likes, social.expert)
plt.scatter(social.likes, social.expert, c='b')
plt.xlabel('Numer of Likes')
plt.ylabel('Expect Aesthetic Score')
plt.title(f'Correlation: {cc[0,1]:.3f}')
plt.show()

In [None]:
cols = [i.split('.')[1] for i in image_columns]

out = []
for n in range(1, 101):
    image_columns = [c for c in columns if c.startswith(str(n)+'.')]
    out.append(df[image_columns].dropna().mean().to_list())
    
pd.DataFrame(columns=cols, data=out, index=np.arange(1, 101)).to_csv('100/100_avg_scores.csv')