In [None]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
import skfuzzy as fuzz
import skfuzzy.membership as mf

# Load dataset
file_path = "Buzzfeed_Shuffel_full.csv"  # Update with actual dataset path
df = pd.read_csv(file_path)

# Convert 'target' column: 0 = fake, 1 = real (convert to string for consistency)
df["target"] = df["target"].map({0: "fake", 1: "real"})

# Create a graph
G = nx.Graph()

# Add nodes and edges (Authors as nodes, Posts as interactions)
for index, row in df.iterrows():
    authors = str(row["authors"]).split(",")  # Split multiple authors
    post_id = str(row["id"])  # Each post is a node
    post_label = row["target"].strip().lower()  # Ensure 'fake' or 'real'

    for author in authors:
        author = author.strip()
        if author:
            G.add_node(author, type="author")
            G.add_node(post_id, type="post", label=post_label)  # Store real/fake label
            G.add_edge(author, post_id)  # Connect author to their post

# Compute node influence based on degree centrality
node_degrees = {node: len(list(G.neighbors(node))) for node in G.nodes()}

# Compute fake news score (stronger fake weight)
node_fake_scores = {}
for node in G.nodes():
    if G.nodes[node].get("type") == "author":
        connected_posts = list(G.neighbors(node))
        fake_count = sum(1 for p in connected_posts if G.nodes[p]["label"] == "fake")
        real_count = sum(1 for p in connected_posts if G.nodes[p]["label"] == "real")
        total_posts = len(connected_posts)

        # **Amplified Fake News Influence**
        fake_ratio = fake_count / total_posts if total_posts > 0 else 0
        fake_weight = 2.5  # Fake news has 2.5x influence
        node_fake_scores[node] = (fake_ratio * fake_weight) * node_degrees[node]

# Define fuzzy membership functions (Bias towards fake news spreaders)
x = np.arange(0, max(node_fake_scores.values(), default=1) + 1, 1)
low = mf.trapmf(x, [0, 0, 2, 4])  # Lower risk (mostly real news)
medium = mf.trimf(x, [3, 7, 12])  # Medium risk (some fake spread)
high = mf.trapmf(x, [10, 15, max(x), max(x)])  # High risk (super-spreader)

# Assign fuzzy epidemiological states
node_states = {}
for node, score in node_fake_scores.items():
    S = fuzz.interp_membership(x, low, score)
    E = fuzz.interp_membership(x, medium, score)
    I = fuzz.interp_membership(x, high, score)

    # **New Category: Non-Susceptible**
    if node_degrees[node] == 1 and real_count == 1:
        node_states[node] = "Non-Susceptible"  # Only one post, only real news
    elif I > E and I > S:
        node_states[node] = "Infected"  # High fake news spreader
    elif E > S:
        node_states[node] = "Exposed"   # Potential spreader
    else:
        node_states[node] = "Susceptible"  # Mostly real news, non-spreader

# Visualization
plt.figure(figsize=(12, 8))
pos = nx.spring_layout(G, seed=42)  # Layout for visualization
colors = {
    "Susceptible": "blue",
    "Exposed": "orange",
    "Infected": "red",
    "Non-Susceptible": "black"  # ⚫ New category color
}
node_colors = [colors[node_states.get(node, "Susceptible")] for node in G.nodes()]

nx.draw(G, pos, with_labels=False, node_size=50, edge_color="gray", alpha=0.7, node_color=node_colors)
plt.title("Fuzzy Epidemiological Model (Including Non-Susceptible)")
# Save the graph visualization as an image
plt.savefig("Buzzfed_graph", dpi=300)
plt.show()