In [17]:
import seaborn as sns
import pandas as pd
import ipywidgets as widgets
from ipywidgets import interact
import matplotlib.pyplot as plt
from wordcloud import WordCloud
import nltk

df = pd.read_csv("../data/multidimensional_with_crt.csv")
pd.set_option('display.max_columns', 100)
df.sample(5)

STOP_WORDS = nltk.corpus.stopwords.words()

In [2]:
# statistics of single part of data
w_select_column = widgets.Dropdown(
    options=df.columns, 
    value="q_0_age_answer", 
    description="Column: "
)

def draw_histgram(column):
    sns.histplot(
        data=df, 
        y=column, 
        stat="percent"
    )
    plt.show()

interact(draw_histgram, column=w_select_column)

interactive(children=(Dropdown(description='Column: ', index=7, options=('unit_id', 'tries_amount', 'questionn…

<function __main__.draw_histgram(column)>

In [7]:
# exmaple of relationship between different parts
w_select_worker_group = widgets.Dropdown(
    options=df.columns, 
    value="q_0_age_answer", 
    description="Worker: "
)
w_select_assessment_dimension = widgets.Dropdown(
    options=df.columns, 
    value="doc_correctness_value", 
    description="Assessment: "
)

def draw_violin_plot(worker_group, assessment_dimension):
    sns.histplot(y=worker_group, x=assessment_dimension, data=df.astype("category"), cbar=True)
    sns.violinplot(y=worker_group, x=assessment_dimension, data=df)
    plt.show()

interact(
    draw_violin_plot, 
    worker_group=w_select_worker_group, 
    assessment_dimension=w_select_assessment_dimension, 
)

interactive(children=(Dropdown(description='Worker: ', index=7, options=('unit_id', 'tries_amount', 'questionn…

<function __main__.draw_violin_plot(worker_group, assessment_dimension)>

In [35]:
def draw_wordcloud_grid(worker_group, assessment_dimension):
    worker_groups = df[worker_group].unique()
    assessment_dimensions = df[assessment_dimension].unique()
    dim_worker = worker_groups.size
    dim_assessment = assessment_dimensions.size
    fig, axs = plt.subplots(dim_worker, dim_assessment)
    fig.set_size_inches(dim_assessment * 5, dim_worker * 5)

    for r in range(dim_worker):
        for c in range(dim_assessment):
            wg = worker_groups[r]
            ad = assessment_dimensions[c]
            df_temp = df[(df[worker_group] == wg) & (df[assessment_dimension] == ad)]
            
            # generate word cloud for the sepecific group
            wc = WordCloud(collocations=False, background_color="white", width=800, height=800, stopwords=STOP_WORDS).generate(" ".join(df_temp["doc_statement"]))
            axs[r, c].imshow(wc, interpolation="bilinear")
            axs[r, c].axis("off")
            axs[r, c].set_title("%s: %s \n %s: %s" % (worker_group, wg, assessment_dimension, ad))
    plt.draw()

interact(
    draw_wordcloud_grid, 
    worker_group=w_select_worker_group, 
    assessment_dimension=w_select_assessment_dimension
)

interactive(children=(Dropdown(description='Worker: ', index=7, options=('unit_id', 'tries_amount', 'questionn…

<function __main__.draw_wordcloud_grid(worker_group, assessment_dimension)>