# Sentiment Network Analysis For Gephi

### Importing Libraries

In [None]:
import pandas as pd

## Adding Engagement Column

In [18]:
# Loading CSV file
df = pd.read_csv("Final_Thesis_Merged.csv")

# Creating the Engagement_Score column
# Engagement_Score = Likes + 2 × Replies_Count
df['Engagement_Score'] = df['Likes'] + 2 * df['Replies_Count']

df.to_csv("Final_Thesis_With_Engagement.csv", index=False)

  df = pd.read_csv("Final_Thesis_With_Engagement.csv")


## Creating Edges data for Gephi

In [20]:
# Loading dataset
df = pd.read_csv("Final_Thesis_With_Engagement.csv")

# Droping any rows with missing IDs
df = df.dropna(subset=["Comment_ID", "Video_ID"])

# Creating edge list: each comment links to the video it belongs to
edges = df[["Comment_ID", "Video_ID"]].rename(columns={
    "Comment_ID": "Source",
    "Video_ID": "Target"
})

edges["Type"] = "Directed"

edges["Weight"] = 1

edges.to_csv("edges_gephi.csv", index=False)


  df = pd.read_csv("Final_Thesis_With_Engagement.csv")


## Creating Nodes data for Gephi

In [13]:
# Loading dataset
df = pd.read_csv("Final_Thesis_With_Engagement.csv")

# Cleaning up the Sentiment column
df['Sentiment'] = df['Sentiment'].astype(str).str.extract(r'(-?1|0)').astype(float)

# Creating comment nodes (includes all requested attributes)
comment_nodes = df[[
    "Comment_ID",
    "Sentiment",
    "Emotion_Class",
    "Claim_Detection",
    "Engagement_Score",
    "Event",
    "Country",
    "Agreed_with_Transcript",
    "Influenced_by_Transcript",
    "Category",
    "Factual_or_Opinion"
]].rename(columns={
    "Comment_ID": "Id",
    "Claim_Detection": "Claim_Verification"
})

comment_nodes["Label"] = comment_nodes["Id"]
comment_nodes["Type"] = "Comment"

# Creating video nodes (unchanged)
video_ids = df["Video_ID"].dropna().unique()
video_nodes = pd.DataFrame({
    "Id": video_ids,
    "Label": video_ids,
    "Type": "Video"
})

# Combining and exporting
all_nodes = pd.concat([comment_nodes, video_nodes], ignore_index=True)
all_nodes.to_csv("nodes_gephi.csv", index=False)


  df = pd.read_csv("Final_Thesis_With_Engagement.csv")


# Claim Verification Network Analysis

Adding the replies count and likes to calculate the engagement score

In [19]:
# Loading the files
claims_df = pd.read_csv("claims_comments_Results.csv")
merged_df = pd.read_csv("Final_Thesis_Merged.csv")

# Dropping rows with missing keys
claims_df = claims_df.dropna(subset=["Claim_Text"])
merged_df = merged_df.dropna(subset=["Rewritten Comment"])

# Merge on Claim_Text , Rewritten Comment
merged = claims_df.merge(
    merged_df[["Rewritten Comment", "Likes", "Replies_Count"]],
    left_on="Claim_Text",
    right_on="Rewritten Comment",
    how="left"
)

# Droping the redundant Rewritten Comment column
merged.drop(columns=["Rewritten Comment"], inplace=True)

# Filling missing Likes and Replies_Count with 0
merged["Likes"] = merged["Likes"].fillna(0)
merged["Replies_Count"] = merged["Replies_Count"].fillna(0)

# Calculating Engagement Score
merged["Engagement_Score"] = merged["Likes"] + 2 * merged["Replies_Count"]

merged.to_csv("claims_with_engagement.csv", index=False)


  merged_df = pd.read_csv("Final_Thesis_Merged.csv")


### Generating a Directed Edge List for Claim–Video Network in Gephi


In [39]:
# Loading the updated dataset with engagement info
df = pd.read_csv("claims_with_engagement.csv")

# Drop rows with missing Claim_ID or Video_ID
df = df.dropna(subset=["Claim_ID", "Video_ID"])

# Creating the edge list
edges = df[["Claim_ID", "Video_ID"]].rename(columns={
    "Claim_ID": "Source",
    "Video_ID": "Target"
})

# Dropping duplicate edges
edges = edges.drop_duplicates(subset=["Source", "Target"])

# Adding Gephi-specific edge columns
edges["Type"] = "Directed"
edges["Weight"] = 1

edges.to_csv("edges_gephi_claim_verification.csv", index=False)


### Generate Gephi Nodes for Comments and Videos


In [1]:
# Loading the merged dataset with engagement and claim outcomes
df = pd.read_csv("claims_with_engagement.csv")

# Createing claim (comment) nodes
comment_nodes = df[[
    "Claim_ID", "Result", "Engagement_Score"
]].rename(columns={
    "Claim_ID": "Id",
    "Result": "Claim_Verification"
})

comment_nodes["Label"] = comment_nodes["Id"]
comment_nodes["Type"] = "Comment"

# Creating video nodes
video_ids = df["Video_ID"].dropna().unique()
video_nodes = pd.DataFrame({
    "Id": video_ids,
    "Label": video_ids,
    "Type": "Video",
    "Claim_Verification": "N/A",
    "Engagement_Score": 0
})

# Combining and exporting
all_nodes = pd.concat([comment_nodes, video_nodes], ignore_index=True)
all_nodes.to_csv("nodes_gephi_claim_verification.csv", index=False)
