In [None]:
# AAFAQ Dataset Figures Generator

This notebook generates all visualizations included in the AAFAQ dataset paper.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from scipy.stats import chi2_contingency
import numpy as np
import itertools

In [None]:
# Load dataset
df = pd.read_csv("AAFAQ_Dataset.csv")

In [None]:
# Figure 2: Question Particles
plt.figure(figsize=(10, 6))
df['QuestionParticle'].value_counts().plot(kind='bar')
plt.title("Figure 2: Frequency of Question Particles")
plt.xlabel("Question Particle")
plt.ylabel("Frequency")
plt.tight_layout()
plt.show()

In [None]:
# Figure 3: Question Particle Type
plt.figure(figsize=(6, 4))
df['QuestionParticleType'].value_counts().plot(kind='bar', color='skyblue')
plt.title("Figure 3: Question Particle Type")
plt.ylabel("Frequency")
plt.tight_layout()
plt.show()

In [None]:
# Figure 4: Question Type
plt.figure(figsize=(6, 4))
df['QuestionType'].value_counts().plot(kind='bar', color='salmon')
plt.title("Figure 4: Question Type")
plt.ylabel("Frequency")
plt.tight_layout()
plt.show()

In [None]:
# Figure 5: List
plt.figure(figsize=(6, 4))
df['List'].value_counts().plot(kind='bar', color='orange')
plt.title("Figure 5: List Feature")
plt.ylabel("Frequency")
plt.tight_layout()
plt.show()

In [None]:
# Figure 6: Answer Type
plt.figure(figsize=(10, 6))
df['AnswerType'].value_counts().plot(kind='bar', color='green')
plt.title("Figure 6: Answer Type")
plt.xlabel("Answer Type")
plt.ylabel("Frequency")
plt.tight_layout()
plt.show()

In [None]:
# Figure 7: Intent
plt.figure(figsize=(12, 6))
df['Intent'].value_counts().plot(kind='bar', color='purple')
plt.title("Figure 7: Intent")
plt.xlabel("Intent")
plt.ylabel("Frequency")
plt.tight_layout()
plt.show()

In [None]:
# Figure 8: Cognitive Level
plt.figure(figsize=(8, 5))
df['CognitiveLevel'].value_counts().plot(kind='bar', color='teal')
plt.title("Figure 8: Cognitive Level")
plt.ylabel("Frequency")
plt.tight_layout()
plt.show()

In [None]:
# Figure 9: Category
plt.figure(figsize=(12, 6))
df['Category'].value_counts().plot(kind='bar', color='cyan')
plt.title("Figure 9: Category Distribution")
plt.xlabel("Category")
plt.ylabel("Frequency")
plt.tight_layout()
plt.show()

In [None]:
# Figure 10: Subjectivity
plt.figure(figsize=(6, 4))
df['Subjectivity'].value_counts().plot(kind='bar', color='coral')
plt.title("Figure 10: Subjectivity")
plt.ylabel("Frequency")
plt.tight_layout()
plt.show()

In [None]:
# Figure 11: Temporal Context
plt.figure(figsize=(8, 5))
df['TemporalContext'].value_counts().plot(kind='bar', color='khaki')
plt.title("Figure 11: Temporal Context")
plt.ylabel("Frequency")
plt.tight_layout()
plt.show()

In [None]:
# Figure 12: Purpose Context
plt.figure(figsize=(8, 5))
df['PurposeContext'].value_counts().plot(kind='bar', color='steelblue')
plt.title("Figure 12: Purpose Context")
plt.ylabel("Frequency")
plt.tight_layout()
plt.show()

In [None]:
# Figure 13: Correlation Matrix
cols = ['QuestionParticle', 'QuestionParticleType', 'Intent', 'AnswerType', 'List',
        'Subjectivity', 'PurposeContext', 'CognitiveLevel', 'QuestionType']
le_map = {}
for col in cols:
    le = LabelEncoder()
    df[col + '_enc'] = le.fit_transform(df[col].astype(str))
    le_map[col] = le

def cramers_v(x, y):
    confusion_matrix = pd.crosstab(x, y)
    chi2 = chi2_contingency(confusion_matrix)[0]
    n = confusion_matrix.sum().sum()
    phi2 = chi2 / n
    r, k = confusion_matrix.shape
    phi2corr = max(0, phi2 - ((k-1)*(r-1))/(n-1))
    rcorr = r - ((r-1)**2)/(n-1)
    kcorr = k - ((k-1)**2)/(n-1)
    return np.sqrt(phi2corr / min((kcorr-1), (rcorr-1)))

import itertools
cramer_matrix = pd.DataFrame(index=cols, columns=cols)
for col1, col2 in itertools.combinations(cols, 2):
    val = cramers_v(df[col1+'_enc'], df[col2+'_enc'])
    cramer_matrix.loc[col1, col2] = val
    cramer_matrix.loc[col2, col1] = val
np.fill_diagonal(cramer_matrix.values, 1.0)
cramer_matrix = cramer_matrix.astype(float)

plt.figure(figsize=(12, 10))
sns.heatmap(cramer_matrix, annot=True, cmap='coolwarm', vmin=0, vmax=1)
plt.title("Figure 13: Correlation Matrix between Annotation Dimensions")
plt.tight_layout()
plt.show()