# Importing libraries

In [None]:
import pandas as pd
import numpy as np
from scipy import stats as stats

# Loading data

In [None]:
table_button_a = pd.read_csv("table_button_a.csv")
table_time_a = pd.read_csv("table_time_a.csv")
table_button_b = pd.read_csv("table_button_b.csv")
table_time_b = pd.read_csv("table_time_b.csv")

qualtrics_data = pd.read_csv("OpenGáta_Website_testing_Students_6_June_2023_final.csv")
# Dropping the first 2 rows from the dataset that are not responses
qualtrics_data = qualtrics_data.drop([0, 1])
# Only use completed responses
qualtrics_data = qualtrics_data[qualtrics_data["Finished"] == "True"]
# Only using "real" responses
qualtrics_data = qualtrics_data[qualtrics_data["Status"] != "Spam"]
# Converts the Random ID variable to a float
qualtrics_data["Random ID"] = qualtrics_data["Random ID"].astype(float)

df_button_a = pd.DataFrame(table_button_a).dropna(subset="visitor_id")
df_time_a = pd.DataFrame(table_time_a).dropna(subset="visitor_id")
df_button_b = pd.DataFrame(table_button_b).dropna(subset="visitor_id")
df_time_b = pd.DataFrame(table_time_b).dropna(subset="visitor_id")

# Uses only the user-tracking data if the visitor ID is also in the list of Qualtrics random IDs 
df_button_a = df_button_a.loc[df_button_a.visitor_id.isin(qualtrics_data["Random ID"])]
df_time_a = df_time_a.loc[df_time_a.visitor_id.isin(qualtrics_data["Random ID"])]
df_button_b = df_button_b.loc[df_button_b.visitor_id.isin(qualtrics_data["Random ID"])]
df_time_b = df_time_b.loc[df_time_b.visitor_id.isin(qualtrics_data["Random ID"])]

display(df_button_a)
display(df_time_a)
display(df_button_b)
display(df_time_b)
display(qualtrics_data)

# Is time data normal?

In [None]:
def is_data_normal(dataframe):
    _, p_value = stats.shapiro(dataframe)
    
    significance_level = 0.05  # Adjust the significance level as needed
    
    if p_value > significance_level:
        return True
    else:
        return False

def normal_values(dataframe, column, data_type, version):
    # Retrieve the column data from the dataframe
    data = dataframe[column]

    # Check if the column data is normally distributed
    is_normal = is_data_normal(data)
    print(f"Is the {data_type} data for version {version} normally distributed?", is_normal)




In [None]:
normal_values(df_time_a, "time_spent", "time", "A")
normal_values(df_time_b, "time_spent", "time", "B")

# Excluding outliers

In [None]:
def outliers(df, column, normal):
    column = str(column)

    if normal == True:
        std = np.std(df[column])
        mean = np.mean(df[column])

        upper_lim = mean + 3*std
        lower_lim = mean - 3*std

        df_no_outliers = df[(df[column] < upper_lim) & (df[column] > lower_lim)]
        return df_no_outliers
    
    elif normal == False:
        df_sorted = df.sort_values(by=[column], ascending=True)

        q1 = df_sorted[column].quantile(0.25)
        q3 = df_sorted[column].quantile(0.75)
        iqr = q3-q1

        upper_lim = q3 + 1.5*iqr
        lower_lim = q1 - 1.5*iqr

        print(f"q1 = {q1}, q3 = {q3}, iqr = {iqr}, upper bound = {upper_lim}, lower bound = {lower_lim}")

        df_sorted = df_sorted.sort_values(by=["id"], ascending=True)

        df_no_outliers = df_sorted[(df_sorted[column] < upper_lim) & (df_sorted[column] > lower_lim)]
        return df_no_outliers

### Time spent

In [None]:
df_time_a = outliers(df=df_time_a, column="time_spent", normal=False)
df_time_a.head()

In [None]:
df_time_b = outliers(df=df_time_b, column="time_spent", normal=False)
df_time_b.head()

# Reshaping data

### User Tracking

In [None]:
#transforming from long to wide, adds "Total" row and column
df_button_wide_a = pd.pivot_table(data=df_button_a, index = "visitor_id", columns = "name", values = "button", aggfunc="sum", \
                                  margins=True, margins_name="Total")
df_time_wide_a = pd.pivot_table(df_time_a, index = "visitor_id", columns = "page", values="time_spent", aggfunc="sum", \
                                margins=True, margins_name="Total")
df_button_wide_b = pd.pivot_table(df_button_b, index = "visitor_id", columns = "name", values = "button", aggfunc="sum", \
                                  margins=True, margins_name="Total")
df_time_wide_b = pd.pivot_table(df_time_b, index = "visitor_id", columns = "page", values="time_spent", aggfunc="sum", \
                                margins=True, margins_name="Total")

# Removing the "Total" row
df_button_wide_a = df_button_wide_a.drop(df_button_wide_a.index[-1])
df_time_wide_a = df_time_wide_a.drop(df_time_wide_a.index[-1])
df_button_wide_b = df_button_wide_b.drop(df_button_wide_b.index[-1])
df_time_wide_b = df_time_wide_b.drop(df_time_wide_b.index[-1])

df_button_wide_a = df_button_wide_a.reset_index()
df_time_wide_a = df_time_wide_a.reset_index()
df_button_wide_b = df_button_wide_b.reset_index()
df_time_wide_b = df_time_wide_b.reset_index()

display(df_button_wide_a)
display(df_time_wide_a)
display(df_button_wide_b)
display(df_time_wide_b)

### Qualtrics data

In [None]:
# Cleaning qualtrics data
qualtrics_data = qualtrics_data.drop(qualtrics_data.columns[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]], axis=1)

# Replacing Likert-scale answers with numeric values
qualtrics_data = qualtrics_data.replace("7 - Strongly agree", 7)
qualtrics_data = qualtrics_data.replace("6 - Agree", 6)
qualtrics_data = qualtrics_data.replace("5 - Somewhat agree", 5)
qualtrics_data = qualtrics_data.replace("4 - Neither agree nor disagree", 4)
qualtrics_data = qualtrics_data.replace("3 - Somewhat disagree", 3)
qualtrics_data = qualtrics_data.replace("2 - Disagree", 2)
qualtrics_data = qualtrics_data.replace("1 - Strongly disagree", 1)

# Seperating the data of A/B versions & inverting questions worded differently on the Likert Scale
df_qualtrics_a = qualtrics_data[qualtrics_data['version'] == 'A']
df_qualtrics_b = qualtrics_data[qualtrics_data['version'] == 'B']

df_qualtrics_a.dropna()
df_qualtrics_b.dropna()

df_qualtrics_a.reset_index()
df_qualtrics_b.reset_index()

# Easy to use questions
df_a_easy = df_qualtrics_a.iloc[:,[1, 6, 7, 10]]
df_b_easy = df_qualtrics_b.iloc[:,[1, 6, 7, 10,]]

df_a_easy.iloc[:, 2] = 8 - df_a_easy.iloc[:, 2]
df_b_easy.iloc[:, 2] = 8 - df_b_easy.iloc[:, 2]

# Trust questions
df_a_trust = df_qualtrics_a.iloc[:,[3, 4, 5, 10]]
df_b_trust = df_qualtrics_b.iloc[:,[3, 4, 5, 10]]

df_a_trust.iloc[:, 1:3] = 8 - df_a_trust.iloc[:, 1:3]
df_b_trust.iloc[:, 1:3] = 8 - df_b_trust.iloc[:, 1:3]

# Informative website questions
df_a_info = df_qualtrics_a.iloc[:,[2, 8, 9, 10]]
df_b_info = df_qualtrics_b.iloc[:,[2, 8, 9, 10]]

df_a_info.iloc[:, [0, 2]] = 8 - df_a_info.iloc[:, [0, 2]]
df_b_info.iloc[:, [0, 2]] = 8 - df_b_info.iloc[:, [0, 2]]

In [None]:
display(df_a_easy)
display(df_b_easy)

display(df_a_trust)
display(df_b_trust)

display(df_a_info)
display(df_b_info)

# Adding total scores

### Qualtrics

In [None]:
def addtotal(dataframe):
    dataframe["Total"] = dataframe.drop("Q3.1", axis=1).sum(axis="columns")
    return dataframe

df_a_easy = addtotal(df_a_easy)
df_b_easy = addtotal(df_b_easy)

df_a_trust = addtotal(df_a_trust)
df_b_trust = addtotal(df_b_trust)

df_a_info = addtotal(df_a_info)
df_b_info = addtotal(df_b_info)

df_a_easy = df_a_easy.reset_index()
df_b_easy = df_b_easy.reset_index()

df_a_trust = df_a_trust.reset_index()
df_b_trust = df_b_trust.reset_index()

df_a_info = df_a_info.reset_index()
df_b_info = df_b_info.reset_index()

display(df_a_easy)
display(df_b_easy)
display(df_a_trust)
display(df_b_trust)
display(df_a_info)
display(df_b_info)

# Is button data normal?

In [None]:
normal_values(df_button_wide_a, "Total", "button click", "A")
normal_values(df_button_wide_b, "Total", "button click", "B")

normal_values(df_a_trust, "Total", "trust", "A")
normal_values(df_b_trust, "Total", "trust", "B")

# Mann-Whitney U test / Wilcoxon Rank-sum test

In [None]:
def conduct_test(group1, group2, alternative):
    _, p_value = stats.ranksums(group1, group2, alternative=alternative)
    
    significance_level = 0.05
    
    if p_value < significance_level:
        print(f"The p-value is {p_value}, and the significance level is {significance_level}")
        print("There is a significant difference between the groups.")
    else:
        print(f"The p-value is {p_value}, and the significance level is {significance_level}")
        print("There is no significant difference between the groups.")

def wrs_test(df_a, df_b, column):
    # Retrieve the column data from the dataframe
    a_data = df_a[column]
    b_data = df_b[column]

    # Perform the Wilcoxon rank-sum test
    conduct_test(a_data, b_data, alternative='greater')

### Time spent - not normally distributed

In [None]:
wrs_test(df_time_wide_a, df_time_wide_b, "Total")

# T-test

In [None]:
def welch_t_test(trust_a, trust_b):
    _, p_value = stats.ttest_ind(trust_a, trust_b, equal_var=False)
    
    significance_level = 0.05
    
    if p_value < significance_level:
        print(f"The p-value is {p_value}, and the significance level is {significance_level}")
        print("There is a significant difference between the groups.")
    else:
        print(f"The p-value is {p_value}, and the significance level is {significance_level}")
        print("There is no significant difference between the groups.")

### Trust - normally distributed

In [None]:
welch_t_test(df_a_trust["Total"], df_b_trust["Total"])

### For buttons clicked - normally distributed

In [None]:
welch_t_test(df_button_wide_a["Total"], df_button_wide_b["Total"])