In [1]:
!pip install dash
!pip install dash --upgrade
!pip install umap
!pip install umap-learn

Collecting dash
  Downloading dash-2.18.2-py3-none-any.whl.metadata (10 kB)
Collecting Werkzeug<3.1 (from dash)
  Downloading werkzeug-3.0.6-py3-none-any.whl.metadata (3.7 kB)
Collecting dash-html-components==2.0.0 (from dash)
  Downloading dash_html_components-2.0.0-py3-none-any.whl.metadata (3.8 kB)
Collecting dash-core-components==2.0.0 (from dash)
  Downloading dash_core_components-2.0.0-py3-none-any.whl.metadata (2.9 kB)
Collecting dash-table==5.0.0 (from dash)
  Downloading dash_table-5.0.0-py3-none-any.whl.metadata (2.4 kB)
Collecting retrying (from dash)
  Downloading retrying-1.3.4-py3-none-any.whl.metadata (6.9 kB)
Downloading dash-2.18.2-py3-none-any.whl (7.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m46.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dash_core_components-2.0.0-py3-none-any.whl (3.8 kB)
Downloading dash_html_components-2.0.0-py3-none-any.whl (4.1 kB)
Downloading dash_table-5.0.0-py3-none-any.whl (3.9 kB)
Downloadi

1. This visualization is a SHAP (SHapley Additive exPlanations) Feature Interaction Heatmap, which highlights the interaction strengths between different features within a model. Each cell in the heatmap represents the SHAP interaction value between two features, quantifying how one feature’s contribution to the prediction depends on the value of another feature. The intensity of the color reflects the magnitude of the interaction, with higher values indicating stronger interdependencies between the corresponding features. This tool is particularly valuable in Explainable AI (XAI) as it goes beyond traditional feature importance by providing insights into how features work together to influence predictions, rather than considering them in isolation. For example, features such as “Trust” and “Social Media Engagement” may interact strongly in predicting user behavior, which could be pivotal for refining recommendation systems or understanding misinformation spread.


In [2]:
import pandas as pd
import requests
import json
import plotly.express as px
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import numpy as np

# === Data Loading ===

# Load the main dataset containing engagement data by country
main_url = "https://raw.githubusercontent.com/AidaCPL/INFOSCI301_Final_Project/main/Data/Main.csv"
df_main = pd.read_csv(main_url)

# Load the Climate-FEVER dataset with climate-related misinformation claims and evidence
climate_url = "https://raw.githubusercontent.com/AidaCPL/INFOSCI301_Final_Project/main/Data/climate-fever-dataset-r1.jsonl"
climate_data = []
response = requests.get(climate_url, stream=True)
for line in response.iter_lines():
    if line:
        climate_data.append(json.loads(line))
df_climate = pd.DataFrame(climate_data)

# Load Politifact fake and real news datasets
fake_url = "https://raw.githubusercontent.com/AidaCPL/INFOSCI301_Final_Project/main/Data/politifact_fake.csv"
real_url = "https://raw.githubusercontent.com/AidaCPL/INFOSCI301_Final_Project/main/Data/politifact_real.csv"
df_fake = pd.read_csv(fake_url)
df_real = pd.read_csv(real_url)

# === Simulated SHAP Interaction Data (Replace with real SHAP interaction values if available) ===
# Assume SHAP interaction values are computed
features = ['Trust', 'Political_Alignment', 'Social_Media_Engagement', 'Risk_Taking']
interaction_matrix = np.random.rand(len(features), len(features))  # Simulated interaction values
interaction_df = pd.DataFrame(interaction_matrix, columns=features, index=features)

# === Dash App Setup ===

app = dash.Dash(__name__)

app.layout = html.Div([
    html.H1("SHAP Feature Interaction Heatmap", style={"textAlign": "center"}),

    # Interactive heatmap
    dcc.Graph(
        id="interaction-heatmap",
        config={"displayModeBar": True}
    ),

    # Description
    html.Div(
        children="This heatmap visualizes the interaction strength between different features. Higher values indicate stronger interactions.",
        style={"textAlign": "center", "marginTop": "10px"}
    )
])

@app.callback(
    Output("interaction-heatmap", "figure"),
    Input("interaction-heatmap", "id")
)
def update_heatmap(_):
    # Generate the interactive heatmap using Plotly
    fig = px.imshow(
        interaction_df,
        text_auto=True,
        color_continuous_scale="Viridis",
        labels={"color": "Interaction Strength"}
    )
    fig.update_layout(
        title="Feature Interaction Heatmap",
        xaxis_title="Feature A",
        yaxis_title="Feature B",
        coloraxis_colorbar=dict(title="Interaction Value"),
    )
    return fig

# Run the app
if __name__ == "__main__":
    app.run_server(debug=True)

<IPython.core.display.Javascript object>

2. This visualization is a Time-Series SHAP Explanations Dashboard, designed to track how SHAP values for specific features evolve over time, providing a dynamic representation of feature contributions in a predictive model. Each line in the plot represents the SHAP values of the selected feature across time intervals, such as months, offering insights into temporal trends and variations in feature importance. For instance, a feature like “Trust” may show increasing SHAP values over time, indicating its growing influence on the model’s predictions in a specific period. This time-series perspective is particularly useful in Explainable AI (XAI) to identify patterns, anomalies, or shifts in feature contributions, which can inform model debugging or strategic decision-making. Leveraging advanced visualization tools like Plotly, this dashboard provides interactivity through a dropdown menu that allows users to seamlessly switch between features and explore their respective temporal dynamics. Additional design elements, such as unified hover modes and customized line markers, enhance the usability and clarity of the visualization.


In [3]:
import pandas as pd
import requests
import json
import plotly.express as px
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import numpy as np

# === Data Loading ===

# Load Main.csv dataset containing engagement data by country
main_url = "https://raw.githubusercontent.com/AidaCPL/INFOSCI301_Final_Project/main/Data/Main.csv"
df_main = pd.read_csv(main_url)

# Load Climate-FEVER dataset with climate misinformation claims and evidence
climate_url = "https://raw.githubusercontent.com/AidaCPL/INFOSCI301_Final_Project/main/Data/climate-fever-dataset-r1.jsonl"
climate_data = []
response = requests.get(climate_url, stream=True)
for line in response.iter_lines():
    if line:
        climate_data.append(json.loads(line))
df_climate = pd.DataFrame(climate_data)

# Load Politifact fake and real news datasets
fake_url = "https://raw.githubusercontent.com/AidaCPL/INFOSCI301_Final_Project/main/Data/politifact_fake.csv"
real_url = "https://raw.githubusercontent.com/AidaCPL/INFOSCI301_Final_Project/main/Data/politifact_real.csv"
df_fake = pd.read_csv(fake_url)
df_real = pd.read_csv(real_url)

# === Simulated SHAP Time-Series Data ===
# Simulate time-series SHAP values for different features
dates = pd.date_range(start="2023-01-01", periods=12, freq="M")  # Monthly data
features = ['Trust', 'Political_Alignment', 'Social_Media_Engagement', 'Risk_Taking']
data = {
    "Date": np.repeat(dates, len(features)),
    "Feature": features * len(dates),
    "SHAP Value": np.random.rand(len(dates) * len(features))  # Random SHAP values
}
df_shap = pd.DataFrame(data)

# === Dash App Setup ===

app = dash.Dash(__name__)
app.title = "Time-Series SHAP Explanations"

# Layout of the Dash App
app.layout = html.Div([
    html.H1("Time-Series SHAP Explanations", style={"textAlign": "center"}),

    # Dropdown for selecting a feature
    html.Div([
        html.Label("Select Feature:", style={"marginTop": "20px"}),
        dcc.Dropdown(
            id='feature-dropdown',
            options=[{"label": feature, "value": feature} for feature in features],
            value=features[0],
            clearable=False
        )
    ], style={"width": "50%", "margin": "auto"}),

    # Interactive time-series visualization
    dcc.Graph(
        id="shap-time-series",
        config={"displayModeBar": True},
        style={"marginTop": "20px"}
    ),

    # Description
    html.Div(
        children="This visualization shows how SHAP values for the selected feature evolve over time. Use the dropdown to switch between features.",
        style={"textAlign": "center", "marginTop": "10px", "fontSize": "16px"}
    )
])

# Callback to update the time-series visualization based on the selected feature
@app.callback(
    Output("shap-time-series", "figure"),
    [Input("feature-dropdown", "value")]
)
def update_time_series(selected_feature):
    # Filter data for the selected feature
    filtered_df = df_shap[df_shap["Feature"] == selected_feature]

    # Generate a line plot using Plotly
    fig = px.line(
        filtered_df,
        x="Date",
        y="SHAP Value",
        title=f"Time-Series SHAP Values for {selected_feature}",
        markers=True
    )

    # Add additional elements for advanced visualization
    fig.update_traces(line=dict(width=3, dash="solid"), marker=dict(size=8))
    fig.update_layout(
        xaxis_title="Date",
        yaxis_title="SHAP Value",
        template="plotly_white",
        hovermode="x unified"
    )
    return fig

# Run the Dash app
if __name__ == "__main__":
    app.run_server(debug=True)


'M' is deprecated and will be removed in a future version, please use 'ME' instead.



<IPython.core.display.Javascript object>

3. This enhanced time-series SHAP explanation visualization provides a dynamic and interactive approach to understanding how the contributions of multiple features to a machine learning model’s predictions evolve over time. By leveraging advanced Explainable AI (XAI) techniques, this tool allows users to explore SHAP values—a well-established metric for feature importance—across custom date ranges and for multiple features simultaneously. Users can intuitively select features such as “Trust,” “Political Alignment,” or “Risk-Taking” from a multi-select dropdown menu, and refine their analysis by specifying a desired date range via an interactive date picker. This combination of flexibility and granularity empowers stakeholders to uncover temporal patterns, shifts, or anomalies in feature importance that might otherwise remain obscured. Using advanced tools for visualization, the dashboard ensures clarity and usability through color-coded time-series plots, unified hover modes, and adjustable markers. These features enhance interpretability and enable users to trace feature dynamics across different time scales, supporting nuanced analyses. For instance, a user might observe that “Social Media Engagement” exhibits a spike in SHAP values during specific time periods, suggesting heightened influence on the model during those intervals.


In [4]:
import pandas as pd
import requests
import json
import plotly.express as px
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import numpy as np

# === Data Loading ===

# Load Main.csv dataset containing engagement data by country
main_url = "https://raw.githubusercontent.com/AidaCPL/INFOSCI301_Final_Project/main/Data/Main.csv"
df_main = pd.read_csv(main_url)

# Load Climate-FEVER dataset with climate misinformation claims and evidence
climate_url = "https://raw.githubusercontent.com/AidaCPL/INFOSCI301_Final_Project/main/Data/climate-fever-dataset-r1.jsonl"
climate_data = []
response = requests.get(climate_url, stream=True)
for line in response.iter_lines():
    if line:
        climate_data.append(json.loads(line))
df_climate = pd.DataFrame(climate_data)

# Load Politifact fake and real news datasets
fake_url = "https://raw.githubusercontent.com/AidaCPL/INFOSCI301_Final_Project/main/Data/politifact_fake.csv"
real_url = "https://raw.githubusercontent.com/AidaCPL/INFOSCI301_Final_Project/main/Data/politifact_real.csv"
df_fake = pd.read_csv(fake_url)
df_real = pd.read_csv(real_url)

# === Simulated SHAP Time-Series Data ===
# Simulate daily SHAP values for multiple features over two years
dates = pd.date_range(start="2023-01-01", periods=730, freq="D")  # Daily data for 2 years
features = ['Trust', 'Political_Alignment', 'Social_Media_Engagement', 'Risk_Taking', 'Education', 'Age']
data = {
    "Date": np.repeat(dates, len(features)),
    "Feature": features * len(dates),
    "SHAP Value": np.random.rand(len(dates) * len(features))  # Random SHAP values
}
df_shap = pd.DataFrame(data)

# === Dash App Setup ===
app = dash.Dash(__name__)
app.title = "Enhanced Time-Series SHAP Explanations"

# Layout of the Dash App
app.layout = html.Div([
    html.H1("Enhanced Time-Series SHAP Explanations", style={"textAlign": "center"}),

    # Feature multi-select
    html.Div([
        html.Label("Select Features:", style={"marginTop": "20px"}),
        dcc.Dropdown(
            id='feature-dropdown',
            options=[{"label": feature, "value": feature} for feature in features],
            value=features[:2],  # Default to the first two features
            multi=True,  # Allow multiple selections
            clearable=True
        )
    ], style={"width": "60%", "margin": "auto"}),

    # Date range picker
    html.Div([
        html.Label("Select Date Range:", style={"marginTop": "20px"}),
        dcc.DatePickerRange(
            id='date-picker',
            start_date=str(dates.min()),
            end_date=str(dates.max()),
            display_format="YYYY-MM-DD",
            style={"margin": "auto"}
        )
    ], style={"width": "60%", "margin": "auto", "textAlign": "center"}),

    # Interactive time-series visualization
    dcc.Graph(
        id="shap-time-series",
        config={"displayModeBar": True},
        style={"marginTop": "20px"}
    ),

    # Description
    html.Div(
        children="This enhanced visualization allows you to select multiple features and a custom date range to analyze SHAP values over time.",
        style={"textAlign": "center", "marginTop": "10px", "fontSize": "16px"}
    )
])

# Callback to update the time-series visualization based on the selected features and date range
@app.callback(
    Output("shap-time-series", "figure"),
    [Input("feature-dropdown", "value"),
     Input("date-picker", "start_date"),
     Input("date-picker", "end_date")]
)
def update_time_series(selected_features, start_date, end_date):
    # Filter data for the selected features and date range
    filtered_df = df_shap[
        (df_shap["Feature"].isin(selected_features)) &
        (df_shap["Date"] >= start_date) &
        (df_shap["Date"] <= end_date)
    ]

    # Generate a line plot using Plotly
    fig = px.line(
        filtered_df,
        x="Date",
        y="SHAP Value",
        color="Feature",
        title="Time-Series SHAP Values",
        markers=True
    )

    # Add additional elements for advanced visualization
    fig.update_traces(line=dict(width=2), marker=dict(size=6))
    fig.update_layout(
        xaxis_title="Date",
        yaxis_title="SHAP Value",
        template="plotly_white",
        hovermode="x unified"
    )
    return fig

# Run the Dash app
if __name__ == "__main__":
    app.run_server(debug=True)

<IPython.core.display.Javascript object>

4. This visualization leverages Explainable AI (XAI) principles and advanced interactive tools to provide a comprehensive analysis of SHAP (SHapley Additive exPlanations) values over time, facilitating an intuitive understanding of feature contributions to model predictions. The dashboard is structured around three interconnected components: a time-series line plot, a feature importance bar chart, and a statistical summary table, all dynamically responding to user inputs such as selected features and custom date ranges. The time-series plot illustrates how SHAP values, which represent the marginal impact of features, evolve daily over two years, enabling users to identify temporal trends, fluctuations, and periods of high feature influence. The bar chart aggregates these SHAP values within the selected range to highlight the relative importance of each feature, providing a concise snapshot of feature rankings. Meanwhile, the statistical table presents detailed metrics, including minimum, maximum, mean, and standard deviation, further enhancing interpretability by quantifying the variability and stability of feature contributions. This visualization embodies advanced interaction capabilities, allowing users to seamlessly filter by multiple features and adjust the date range to explore localized insights or broad patterns. The use of theinteractive elements, such as tooltips and hover modes, makes it easy to access granular details on feature behavior while maintaining a clean and professional layout. By integrating these tools with SHAP values, the visualization bridges the gap between complex AI models and human decision-making, offering a transparent and actionable exploration of model behavior. It serves as an invaluable resource for data scientists, domain experts, and stakeholders seeking to validate model outputs, understand feature dynamics, and ensure alignment with real-world expectations.


In [5]:
import pandas as pd
import requests
import json
import plotly.express as px
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import numpy as np

# === Simulated SHAP Time-Series Data ===
# Simulate daily SHAP values for multiple features over two years
dates = pd.date_range(start="2023-01-01", periods=730, freq="D")  # Daily data for 2 years
features = ['Trust', 'Political_Alignment', 'Social_Media_Engagement', 'Risk_Taking', 'Education', 'Age']
data = {
    "Date": np.repeat(dates, len(features)),
    "Feature": features * len(dates),
    "SHAP Value": np.random.rand(len(dates) * len(features))  # Random SHAP values
}
df_shap = pd.DataFrame(data)

# === Dash App Setup ===
app = dash.Dash(__name__)
app.title = "Advanced Explainable AI Time-Series"

# Layout of the Dash App
app.layout = html.Div([
    html.H1("Explainable AI - Advanced Time-Series SHAP Visualizations", style={"textAlign": "center"}),

    # Feature multi-select
    html.Div([
        html.Label("Select Features:", style={"marginTop": "20px"}),
        dcc.Dropdown(
            id='feature-dropdown',
            options=[{"label": feature, "value": feature} for feature in features],
            value=features[:3],  # Default to the first three features
            multi=True,  # Allow multiple selections
            clearable=True
        )
    ], style={"width": "60%", "margin": "auto"}),

    # Date range picker
    html.Div([
        html.Label("Select Date Range:", style={"marginTop": "20px"}),
        dcc.DatePickerRange(
            id='date-picker',
            start_date=str(dates.min()),
            end_date=str(dates.max()),
            display_format="YYYY-MM-DD",
            style={"margin": "auto"}
        )
    ], style={"width": "60%", "margin": "auto", "textAlign": "center"}),

    # SHAP Time-Series Line Plot
    dcc.Graph(
        id="shap-time-series",
        config={"displayModeBar": True},
        style={"marginTop": "20px"}
    ),

    # SHAP Feature Importance Bar Chart
    dcc.Graph(
        id="shap-feature-importance",
        config={"displayModeBar": False},
        style={"marginTop": "20px"}
    ),

    # Feature statistics table
    html.Div([
        html.H3("Feature Statistics Summary", style={"textAlign": "center", "marginTop": "20px"}),
        html.Table(id="feature-stats-table", style={"margin": "auto", "width": "80%", "border": "1px solid black"})
    ]),

    # Description
    html.Div(
        children="Explore SHAP time-series explanations with dynamic feature selection, date filtering, and feature statistics.",
        style={"textAlign": "center", "marginTop": "10px", "fontSize": "16px"}
    )
])

# Callback to update the time-series line plot, feature importance bar chart, and statistics table
@app.callback(
    [Output("shap-time-series", "figure"),
     Output("shap-feature-importance", "figure"),
     Output("feature-stats-table", "children")],
    [Input("feature-dropdown", "value"),
     Input("date-picker", "start_date"),
     Input("date-picker", "end_date")]
)
def update_visualizations(selected_features, start_date, end_date):
    # Filter data for the selected features and date range
    filtered_df = df_shap[
        (df_shap["Feature"].isin(selected_features)) &
        (df_shap["Date"] >= start_date) &
        (df_shap["Date"] <= end_date)
    ]

    # === SHAP Time-Series Line Plot ===
    line_fig = px.line(
        filtered_df,
        x="Date",
        y="SHAP Value",
        color="Feature",
        title="Time-Series SHAP Values",
        markers=True
    )
    line_fig.update_traces(line=dict(width=2), marker=dict(size=6))
    line_fig.update_layout(
        xaxis_title="Date",
        yaxis_title="SHAP Value",
        template="plotly_white",
        hovermode="x unified"
    )

    # === SHAP Feature Importance Bar Chart ===
    # Aggregate SHAP values by feature for the selected date range
    feature_importance = filtered_df.groupby("Feature")["SHAP Value"].mean().reset_index()
    bar_fig = px.bar(
        feature_importance,
        x="SHAP Value",
        y="Feature",
        orientation="h",
        title="Feature Importance (Average SHAP Values)",
        color="Feature",
        text="SHAP Value"
    )
    bar_fig.update_layout(
        xaxis_title="Average SHAP Value",
        yaxis_title="Feature",
        template="plotly_white"
    )
    bar_fig.update_traces(texttemplate='%{text:.2f}', textposition='inside')

    # === Feature Statistics Table ===
    stats_table = []
    if not filtered_df.empty:
        stats = filtered_df.groupby("Feature")["SHAP Value"].agg(["min", "max", "mean", "std"]).reset_index()
        # Table header
        stats_table.append(html.Tr([html.Th(col) for col in stats.columns]))
        # Table rows
        for i in range(len(stats)):
            stats_table.append(html.Tr([html.Td(stats.iloc[i, col]) for col in range(len(stats.columns))]))
    else:
        stats_table.append(html.Tr([html.Td("No data available for selected range or features.")]))

    return line_fig, bar_fig, stats_table

# Run the Dash app
if __name__ == "__main__":
    app.run_server(debug=True)

<IPython.core.display.Javascript object>

5. This visualization represents an enhanced interactive causal graph, which integrates Explainable AI concepts to reveal the relationships between key components of a recommendation system. The graph is structured as a multi-layered network, where nodes represent distinct entities such as user features (e.g., “User Interests,” “Social Network”), interaction points (e.g., “Content Engagement”), system processes (e.g., “Recommendation Algorithm”), and output metrics (e.g., “CTR,” “Recommended Content”). Edges between nodes indicate causal relationships, with their thickness and color intensity signifying the strength of influence, as quantified by weighted values. Users can dynamically filter the graph by adjusting a slider to set a minimum edge weight threshold, thereby focusing on the most significant causal interactions. Additionally, a layer checklist allows the exploration of specific layers, enabling targeted analysis of user features, system processes, or interactions separately or in combination. The tool employs advanced visualization techniques to enhance interpretability and user experience. Nodes are color-coded by their respective layers and sized according to their connectivity, providing an intuitive representation of their importance in the network. Hover functionality enriches the graph with detailed information about each node’s description and the weight of each edge. By dynamically adjusting filters and exploring various layers, stakeholders can analyze how user behaviors and system features interact to shape outputs, such as click-through rates or feedback. This interactive approach, powered by NetworkX and Plotly, underscores the transparency and modularity of Explainable AI, making complex relationships in data-driven systems accessible and actionable for both technical and non-technical audiences.



In [6]:
import pandas as pd
import networkx as nx
import plotly.graph_objects as go
import dash
from dash import dcc, html
from dash.dependencies import Input, Output

# === Extended Causal Data ===
nodes = [
    {"id": "User Interests", "layer": "User Features", "description": "Interest categories of the user."},
    {"id": "Social Network", "layer": "User Features", "description": "User's social connections."},
    {"id": "Age", "layer": "User Features", "description": "User's age group."},
    {"id": "Content Engagement", "layer": "Interaction", "description": "User's engagement with content."},
    {"id": "Recommendation Algorithm", "layer": "System", "description": "System's recommendation algorithm."},
    {"id": "Recommended Content", "layer": "Output", "description": "Content recommended to the user."},
    {"id": "User Feedback", "layer": "Output", "description": "Feedback provided by the user."},
    {"id": "CTR (Click-Through Rate)", "layer": "System", "description": "Click-through rate of recommendations."},
]

edges = [
    {"source": "User Interests", "target": "Content Engagement", "weight": 0.8},
    {"source": "Social Network", "target": "Content Engagement", "weight": 0.6},
    {"source": "Age", "target": "Content Engagement", "weight": 0.4},
    {"source": "Content Engagement", "target": "Recommendation Algorithm", "weight": 0.9},
    {"source": "Recommendation Algorithm", "target": "Recommended Content", "weight": 0.95},
    {"source": "Recommended Content", "target": "User Feedback", "weight": 0.7},
    {"source": "User Feedback", "target": "CTR (Click-Through Rate)", "weight": 0.6},
    {"source": "CTR (Click-Through Rate)", "target": "Recommendation Algorithm", "weight": 0.5},
]

# === Dash App Setup ===
app = dash.Dash(__name__)
app.title = "Advanced Interactive Recommendation Graph"

# App layout
app.layout = html.Div([
    html.H1("Enhanced Recommendation System Graph", style={"textAlign": "center"}),

    # Layer selection
    html.Div([
        html.Label("Select Layers to Display:", style={"marginTop": "20px"}),
        dcc.Checklist(
            id="layer-checklist",
            options=[
                {"label": "User Features", "value": "User Features"},
                {"label": "Interaction", "value": "Interaction"},
                {"label": "System", "value": "System"},
                {"label": "Output", "value": "Output"},
            ],
            value=["User Features", "Interaction", "System", "Output"],
            inline=True
        )
    ], style={"textAlign": "center", "marginBottom": "20px"}),

    # Slider to filter edge weights
    html.Div([
        html.Label("Adjust Causal Effect Threshold (Edge Weight):", style={"marginTop": "20px"}),
        dcc.Slider(
            id="weight-slider",
            min=0,
            max=1,
            step=0.1,
            value=0.5,
            marks={i / 10: f"{i / 10:.1f}" for i in range(0, 11)},
            tooltip={"placement": "bottom", "always_visible": True}
        )
    ], style={"width": "60%", "margin": "auto"}),

    # Graph visualization
    dcc.Graph(id="causal-graph", config={"displayModeBar": False}, style={"marginTop": "30px"}),

    # Explanation Section
    html.Div(id="explanation-section", style={"textAlign": "center", "marginTop": "20px", "fontSize": "16px"})
])

# Callback to update the graph and explanation
@app.callback(
    [Output("causal-graph", "figure"),
     Output("explanation-section", "children")],
    [Input("weight-slider", "value"),
     Input("layer-checklist", "value")]
)
def update_graph(weight_threshold, visible_layers):
    try:
        # Create a directed graph
        G = nx.DiGraph()

        # Add nodes
        for node in nodes:
            if node["layer"] in visible_layers:
                G.add_node(node["id"], layer=node["layer"], description=node["description"])

        # Add edges with weight filtering
        for edge in edges:
            if edge["weight"] >= weight_threshold and G.has_node(edge["source"]) and G.has_node(edge["target"]):
                G.add_edge(edge["source"], edge["target"], weight=edge["weight"])

        # Handle empty graph
        if not G.edges:
            fig = go.Figure()
            fig.update_layout(
                title="No Edges Meet the Current Threshold",
                xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                template="plotly_white"
            )
            explanation = "No causal relationships meet the selected threshold."
            return fig, explanation

        # Generate positions for the graph
        pos = nx.multipartite_layout(G, subset_key="layer")

        # Create edge traces
        edge_traces = []
        for edge in G.edges(data=True):
            x0, y0 = pos[edge[0]]
            x1, y1 = pos[edge[1]]
            edge_trace = go.Scatter(
                x=[x0, x1, None],
                y=[y0, y1, None],
                line=dict(width=edge[2]["weight"] * 5, color="blue"),
                hoverinfo="text",
                mode="lines",
                text=f"{edge[0]} → {edge[1]}<br>Weight: {edge[2]['weight']:.2f}",
                name=f"{edge[0]} → {edge[1]}"  # Add meaningful legend names
            )
            edge_traces.append(edge_trace)

        # Create node traces
        node_x = []
        node_y = []
        node_text = []
        node_sizes = []
        node_colors = {"User Features": "blue", "Interaction": "green", "System": "orange", "Output": "red"}
        for node in G.nodes(data=True):
            x, y = pos[node[0]]
            node_x.append(x)
            node_y.append(y)
            node_text.append(f"{node[0]}<br>{G.nodes[node[0]]['description']}")
            node_sizes.append(len(list(G.neighbors(node[0]))) * 5 + 20)

        node_trace = go.Scatter(
            x=node_x,
            y=node_y,
            mode="markers",
            hoverinfo="text",
            text=node_text,
            marker=dict(
                size=node_sizes,
                color=[node_colors[node[1]["layer"]] for node in G.nodes(data=True)],
                line=dict(width=2, color="darkgray")
            )
        )

        # Combine traces
        fig = go.Figure(data=edge_traces + [node_trace])
        fig.update_layout(
            title="Enhanced Recommendation Graph with Detailed Nodes and Edges",
            xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
            yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
            template="plotly_white"
        )

        explanation = f"Displaying causal relationships with weights ≥ {weight_threshold:.1f} in selected layers: {', '.join(visible_layers)}."
        return fig, explanation

    except Exception as e:
        fig = go.Figure()
        explanation = f"An error occurred: {e}. Please try again."
        return fig, explanation

if __name__ == "__main__":
    app.run_server(debug=True)

<IPython.core.display.Javascript object>

6. This visualization provides an advanced exploration of text embeddings for dynamic topic tracking, showcasing the synergy of Explainable AI and state-of-the-art tools such as UMAP and KMeans clustering. By projecting high-dimensional text embeddings into a 2D space, the scatter plot effectively captures and visualizes the relationships between claims from the Climate-FEVER dataset. Each point represents a claim, with its position determined by UMAP’s dimensionality reduction, which preserves semantic proximity, and its color coded by KMeans clusters to highlight distinct topic groupings. The point size, dynamically adjustable via the slider, is linked to an importance metric, allowing for the emphasis of more significant claims within the dataset. The dropdown feature facilitates filtering by cluster or label, offering targeted insights into claims categorized as true, false, or a mixture. Hover functionality enhances interactivity, providing rich contextual details such as the claim text, its cluster, and its label, directly on the graph. This level of granularity empowers users to explore the underlying structures and relationships within the data. The combination of interactive filters and scalable point sizes ensures the visualization remains adaptable to diverse analysis needs, making it a valuable tool for researchers and decision-makers aiming to understand complex textual datasets in the context of misinformation and topic dynamics. This visualization exemplifies the application of Explainable AI by rendering intricate machine learning outputs interpretable and actionable for both technical and non-technical audiences.

In [7]:
import pandas as pd
import requests
import json
import plotly.express as px
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
import umap

# === Data Loading ===

# Load Climate-FEVER dataset with climate misinformation claims and evidence
climate_url = "https://raw.githubusercontent.com/AidaCPL/INFOSCI301_Final_Project/main/Data/climate-fever-dataset-r1.jsonl"
climate_data = []
response = requests.get(climate_url, stream=True)
for line in response.iter_lines():
    if line:
        climate_data.append(json.loads(line))
df_climate = pd.DataFrame(climate_data)

# Extract claims and their labels for clustering
if "claim" in df_climate and "claim_label" in df_climate:
    claims = df_climate["claim"]
    claim_labels = df_climate["claim_label"]
else:
    claims = ["Example claim " + str(i) for i in range(100)]  # Fallback data
    claim_labels = ["true", "false", "mixture"] * 33 + ["true"]

# === Text Embedding and Clustering ===

# Generate TF-IDF embeddings
vectorizer = TfidfVectorizer(max_features=500)
X = vectorizer.fit_transform(claims)

# Apply KMeans clustering
kmeans = KMeans(n_clusters=5, random_state=42)
clusters = kmeans.fit_predict(X)

# Apply UMAP for dimensionality reduction
umap_model = umap.UMAP(n_neighbors=15, min_dist=0.1, random_state=42)
umap_embeddings = umap_model.fit_transform(X.toarray())

# Create a DataFrame for visualization
df_embeddings = pd.DataFrame(umap_embeddings, columns=["x", "y"])
df_embeddings["cluster"] = clusters
df_embeddings["label"] = claim_labels
df_embeddings["claim"] = claims
df_embeddings["importance"] = [len(claim) % 10 + 1 for claim in claims]  # Simulated feature for importance

# === Dash App Setup ===
app = dash.Dash(__name__)
app.title = "Enhanced Dynamic Topic Tracking"

# App layout
app.layout = html.Div([
    html.H1("Enhanced Dynamic Topic Tracking with Word Embedding Clustering", style={"textAlign": "center"}),

    # Dropdown for selecting cluster or label filter
    html.Div([
        html.Label("Filter by Cluster or Label:", style={"marginTop": "20px"}),
        dcc.Dropdown(
            id="filter-dropdown",
            options=[
                {"label": "All Clusters", "value": "all"},
                {"label": "Cluster 0", "value": "cluster_0"},
                {"label": "Cluster 1", "value": "cluster_1"},
                {"label": "Cluster 2", "value": "cluster_2"},
                {"label": "Cluster 3", "value": "cluster_3"},
                {"label": "Cluster 4", "value": "cluster_4"},
                {"label": "Label: True", "value": "true"},
                {"label": "Label: False", "value": "false"},
                {"label": "Label: Mixture", "value": "mixture"},
            ],
            value="all",
            clearable=False
        )
    ], style={"width": "60%", "margin": "auto"}),

    # Slider to adjust point size
    html.Div([
        html.Label("Adjust Point Size:", style={"marginTop": "20px"}),
        dcc.Slider(
            id="size-slider",
            min=5,
            max=50,
            step=1,
            value=15,
            marks={i: str(i) for i in range(5, 51, 5)},
        )
    ], style={"width": "60%", "margin": "auto"}),

    # Visualization
    dcc.Graph(id="embedding-visualization", style={"marginTop": "30px"}),

    # Explanation Section
    html.Div(id="explanation-section", style={"textAlign": "center", "marginTop": "20px", "fontSize": "16px"})
])

# Callback to update the embedding visualization and explanation dynamically
@app.callback(
    [Output("embedding-visualization", "figure"),
     Output("explanation-section", "children")],
    [Input("filter-dropdown", "value"),
     Input("size-slider", "value")]
)
def update_visualization(filter_value, point_size):
    # Filter data based on selection
    if filter_value == "all":
        filtered_data = df_embeddings
        title = "All Clusters and Labels"
    elif filter_value.startswith("cluster"):
        cluster_number = int(filter_value.split("_")[1])
        filtered_data = df_embeddings[df_embeddings["cluster"] == cluster_number]
        title = f"Cluster {cluster_number}"
    else:
        filtered_data = df_embeddings[df_embeddings["label"] == filter_value]
        title = f"Label: {filter_value.capitalize()}"

    # Create scatter plot with dynamic size and color gradient
    fig = px.scatter(
        filtered_data,
        x="x",
        y="y",
        size="importance",
        color="cluster",
        hover_data={"claim": True, "label": True, "x": False, "y": False},
        title=title,
        labels={"cluster": "Cluster"},
        size_max=point_size
    )

    # Explanation
    explanation = (
        f"Displaying {title} with point size adjusted to {point_size}. "
        "Hover over points to see individual claims, labels, and cluster information."
    )

    return fig, explanation


if __name__ == "__main__":
    app.run_server(debug=True)


n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.



<IPython.core.display.Javascript object>