Install Required Libraries

`pip install -r requirements.txt`

In [1]:
import numpy as np
from scipy.stats import  chi2_contingency, shapiro, mannwhitneyu
import pandas as pd

In [2]:
# Load data
file_path_week_1 = './data/The impact of the Microsoft Edge News Page on your work productivity.(1-98).xlsx'
file_path_week_2 = './data/News and work productivity.(1-81).xlsx'
file_path_week_3 = './data/The impact of the Microsoft Edge News Page on your work productivity.(1-63).xlsx'
file_path_post_study = './data/MS Edge Study - Follow-Up Survey(1-13).xlsx'

In [3]:
# convert data to pandas dataframe
week_1_data = pd.read_excel(file_path_week_1)
week_2_data = pd.read_excel(file_path_week_2)
week_3_data = pd.read_excel(file_path_week_3)
post_study_data = pd.read_excel(file_path_post_study)

In [4]:
translation_dict = {
    "Strongly Disagree": -3,
    "Disagree": -2,
    "Somewhat Disagree": -1,
    "Neither Agree nor Disagree": 0,
    "Somewhat Agree": 1,
    "Agree": 2,
    "Strongly Agree": 3,
}

In [5]:
def test_normal_distribution(data: list) -> bool:
    stat, p = shapiro(data)
    print(f"Shapiro-Wilk Test: W = {stat:.3f}, p = {p:.3f}")
    if p > 0.05:
        print("Data is normally distributed")
        return True
    else:
        print("Data is not normally distributed")
        return False

In [6]:
def mann_whitney_test_distractions():
    print("DISTRACTIONS - comparison between weeks")
    week_1_distractions = np.array([translation_dict[x] for x in week_1_data["Opinion"]])
    week_2_distractions = np.array([translation_dict[x] for x in week_2_data["Opinion"]])
    week_3_distractions = np.array([translation_dict[x] for x in week_3_data["Opinion"]])
    print("average distractions for week 1: ", np.mean(week_1_distractions))
    print("average distractions for week 2: ", np.mean(week_2_distractions))
    print("average distractions for week 3: ", np.mean(week_3_distractions))

    stat, p = mannwhitneyu(week_1_distractions, week_2_distractions)
    print(f"Mann Whitney U Test Week 1 vs Week 2: U = {stat:.3f}, p = {p:.3f}")
    stat, p = mannwhitneyu(week_1_distractions, week_3_distractions)
    print(f"Mann Whitney U Test Week 1 vs Week 3: U = {stat:.3f}, p = {p:.3f}")
    print("----------------------------------------")
    print()

In [7]:
def mann_whitney_test_non_work_related_activities():
    print("Time spent on non-work related activities - comparison between weeks")
    week_1_time = np.array([x for x in week_1_data[
        "How much time did you spend with non-work related activities in your browser? (example: reading news) CLEANED"]])
    week_2_time = np.array([x for x in week_2_data[
        "How much time did you spend with non-work related activities in your browser? (example: reading news) CLEANED"]])
    week_3_time = np.array([x for x in week_3_data[
        "How much time did you spend with non-work related activities in your browser? (example: reading news) CLEANED"]])

    print("average time spent on non-work related activities for week 1: ", np.mean(week_1_time))
    print("average time spent on non-work related activities for week 2: ", np.mean(week_2_time))
    print("average time spent on non-work related activities for week 3: ", np.mean(week_3_time))

    stat, p = mannwhitneyu(week_1_time, week_2_time)
    print(f"Mann Whitney U Test Week 1 vs Week 2: U = {stat:.3f}, p = {p:.3f}")
    stat, p = mannwhitneyu(week_1_time, week_3_time)
    print(f"Mann Whitney U Test Week 1 vs Week 3: U = {stat:.3f}, p = {p:.3f}")
    print("----------------------------------------")
    print()

In [8]:
def chi_test():
    print("Relationship between preferred edge settings, and time spent checking the news")
    preferences = pd.crosstab(post_study_data["Which MS Edge setting do you prefer?"], post_study_data[
        "Do you think that using Edge with the news feed led you to spend more time checking the news?\n"])
    contingency_table = preferences.values
    chi2_stat, p_chi2, _, _ = chi2_contingency(contingency_table)
    print(f"Chi-Square Test: chi2 = {chi2_stat:.3f}, p = {p_chi2:.3f}")
    print("----------------------------------------")
    print()

In [9]:
mann_whitney_test_distractions()
mann_whitney_test_non_work_related_activities()
chi_test()

DISTRACTIONS - comparison between weeks
average distractions for week 1:  -0.6530612244897959
average distractions for week 2:  -1.5925925925925926
average distractions for week 3:  -0.9047619047619048
Mann Whitney U Test Week 1 vs Week 2: U = 5316.000, p = 0.000
Mann Whitney U Test Week 1 vs Week 3: U = 3374.500, p = 0.311
----------------------------------------

Time spent on non-work related activities - comparison between weeks
average time spent on non-work related activities for week 1:  2.9285714285714284
average time spent on non-work related activities for week 2:  2.308641975308642
average time spent on non-work related activities for week 3:  2.507936507936508
Mann Whitney U Test Week 1 vs Week 2: U = 4454.500, p = 0.140
Mann Whitney U Test Week 1 vs Week 3: U = 3042.000, p = 0.874
----------------------------------------

Relationship between preferred edge settings, and time spent checking the news
Chi-Square Test: chi2 = 1.376, p = 0.503
---------------------------------