# North Macedonian 2024 Election Misinformation Assessment
---
This notebook is part of the North Macedonian 2024 Election Misinformation Assessment project. It contains code to analyze and visualize misinformation related to the elections in North Macedonia.

##### Libraries

In [4]:
from dotenv import load_dotenv
import requests
import json
from urllib.parse import quote_plus
import tqdm
import json
import os

##### Constants

In [6]:
load_dotenv()
llama3_url = "http://localhost:11434/api/generate"
google_api = os.getenv("GOOGLE-API-KEY")
cx = os.getenv("CX")

## Data Collection

In [3]:
text = """Himni dhe simbolet shtetërore kanë vendin dhe kohën e vet se ku përdoren dhe nuk duhet të shërbejnë për t’i bërë karshillëk askujt. 
Propozimi i kreut të VMRO-DPMNE Hristijan Mickoski që të obligon fëmijët çdo të hënë të këndojnë himnin shtetëror pasqyron mendësinë nacionaliste të VMRO-DPMNE për të ushqyer fëmijët me nacionalizëm dhe me këtë të motivohet politika e tyre drejt izolim shoqëror dhe shtetëror. 
Kam një propozim më kreativ se çka duhet të dëgjojnë fëmijët në këtë shtet dhe atë jo çdo të hënë por çdo ditë:
- të dëgjojnë se si VMRO-DPMNE shet nacionalizëm të rrejshëm ndërsa bën argatin e agjendave anti-evropiane!
- se si VMRO-DPMNE nxit urrejtje dhe ndasi!
-se si VMRO-DPMNE po bllokon të ardhmen evropiane të gjeneratave të reja!
-se si VMRO-DPMNE po izolon shtetin!
- se si VMRO-DPMNE ushtron pushtetin kundër kombit shqiptar në komunat që udhëheqin kuadrot e saj!
Me qasje të këtij lloji, shteti rrezikon shumë prandaj është i nevojshëm një bllok më i madh me orientim të qartë për të ardhmen evropiane të vendit."""

## Data Preprocessing

### Extract Claims

In [4]:

def extract_claims(text):
    promptQuestion= """For the bellow text, extract the claims and give to me in the format of a json. You should only return the json. Do not add any other text. The json should have only 1 key `Claims` and the value should be a list of claims."""
    
    extract_claim_payload = {
    "model": "llama3",
    "prompt": promptQuestion + text,
    }
    
    extract_claim_response = requests.post(llama3_url, json=extract_claim_payload)
    
    if extract_claim_response.status_code == 200:
        lines = extract_claim_response.text.splitlines()
        output = ""
        for line in lines:
            try:
                # Parse each line as JSON
                line_data = json.loads(line)
                # Concatenate the "response" field
                output += line_data.get("response", "")
            except json.JSONDecodeError:
                # Skip lines that are not valid JSON
                continue
        # Parse the final concatenated output as JSON
        try:
            final_output = json.loads(output)
            print("Extracted Claims:", final_output)
            return final_output
        except json.JSONDecodeError:
            # Add a } to the end of the output
            output += "}"
            try:
                final_output = json.loads(output)
                print("Extracted Claims:", final_output)
                return final_output
            except json.JSONDecodeError:
                print("Final output is not valid JSON:", output)
                return None
    else:
        print(f"Error: {extract_claim_response.status_code}")
        print(extract_claim_response.text)

In [67]:
claims = extract_claims(text)
claims = claims.get("Claims", [])
print("Number of claims extracted:", len(claims))

Extracted Claims: {'Claims': ['Himni dhe simbolet shtetërore kanë vendin dhe kohën e vet se ku përdoren', 'The proposal of the leader of VMRO-DPMNE Hristijan Mickoski to make children sing the national anthem every month reflects the nationalist mindset of VMRO-DPMNE', 'VMRO-DPMNE spreads unjust nationalism while making excuses for anti-European agendas', 'VMRO-DPMNE fosters hatred and division', 'VMRO-DPMNE blocks the European future of young generations', 'VMRO-DPMNE isolates the country', 'VMRO-DPMNE exercises power against Albanian nation in the municipalities that lead its quotas']}
Number of claims extracted: 7


### Claim to Questions

In [72]:
def claim_to_question(claim):
    prompt_question = f""" For the bellow claim generate me 3 neural fact-checking questions and give to me in the format of a json. You should only return the json. The json should have only 1 key `Questions` and the value should be a list of questions.
    Claim: "{claim}"""
    
    extract_claim_payload = {
    "model": "llama3",
    "prompt": prompt_question + text,
    }
    
    question_response = requests.post(llama3_url, json=extract_claim_payload)
    
    if question_response.status_code == 200:
        # Process the streamed response
        lines = question_response.text.splitlines()
        output = ""

        for line in lines:
            try:
                # Parse each line as JSON
                line_data = json.loads(line)
                # Concatenate the "response" field
                output += line_data.get("response", "")
            except json.JSONDecodeError:
                # Skip lines that are not valid JSON
                continue

        # Parse the final concatenated output as JSON
        try:
            final_output = json.loads(output)
            print("Generated Questions:", final_output)
            return final_output
        except json.JSONDecodeError:
            # Add a } to the end of the output
            output += "}"
            try:
                final_output = json.loads(output)
                print("Generated Questions:", final_output)
                return final_output
            except json.JSONDecodeError:
                print("Final output is not valid JSON:", output)
                return None
    else:
        print(f"Error: {question_response.status_code}")
        print(question_response.text)

In [74]:
questions = []
claim_to_question_dic = {}
for claim in claims:
    question = claim_to_question(claim)
    # put the question in the dictionary
    claim_to_question_dic[claim] = question.get("Questions", [])
    print(claim_to_question_dic)

Generated Questions: {'Questions': ["What is the purpose of singing national anthems according to Hristijan Mickoski's proposal?", 'Is it true that VMRO-DPMNE promotes nationalism and anti-European agendas, as claimed by the author?', 'Does the author suggest that VMRO-DPMNE isolates the country and hinders its future prospects in Europe?']}
{'Himni dhe simbolet shtetërore kanë vendin dhe kohën e vet se ku përdoren': ["What is the purpose of singing national anthems according to Hristijan Mickoski's proposal?", 'Is it true that VMRO-DPMNE promotes nationalism and anti-European agendas, as claimed by the author?', 'Does the author suggest that VMRO-DPMNE isolates the country and hinders its future prospects in Europe?']}
Generated Questions: {'Questions': ['Is the proposal to make children sing the national anthem every month a common practice in any country?', 'What is the current state of education in North Macedonia regarding national anthems, and is it true that VMRO-DPMNE is trying

## Google Search (Evidence) Retrieval

In [91]:
def google_search(query, api_key, cse_id, num=3):
    query = quote_plus(query)
    url = f"https://www.googleapis.com/customsearch/v1?q={query}&key={api_key}&cx={cse_id}&num={num}"
    response = requests.get(url).json()
    
    results = []
    for item in response.get('items', []):
        results.append({
            'title': item.get('title'),
            'link': item.get('link'),
            'snippet': item.get('snippet')
        })
    return results[:5] 

In [94]:
Evidence = {}
Claims_and_Evidence_dict = {}
for claim, questions in claim_to_question_dic.items():
    print(claim)
    print(questions)
    for question in questions:
        search_results = google_search(question, google_api, cx)
        Evidence[question] = search_results
    Claims_and_Evidence_dict[claim] = Evidence
    Evidence = {}
    
    
    
    # for question in questions:
    #     search_results = google_search(question, google_api, cx)
    #     Evidence[question] = search_results
    # Claims_and_Evidence_dict[claim] = Evidence
    # Evidence = {}
    # print(Claims_and_Evidence_dict)

Himni dhe simbolet shtetërore kanë vendin dhe kohën e vet se ku përdoren
["What is the purpose of singing national anthems according to Hristijan Mickoski's proposal?", 'Is it true that VMRO-DPMNE promotes nationalism and anti-European agendas, as claimed by the author?', 'Does the author suggest that VMRO-DPMNE isolates the country and hinders its future prospects in Europe?']
The proposal of the leader of VMRO-DPMNE Hristijan Mickoski to make children sing the national anthem every month reflects the nationalist mindset of VMRO-DPMNE
['Is the proposal to make children sing the national anthem every month a common practice in any country?', 'What is the current state of education in North Macedonia regarding national anthems, and is it true that VMRO-DPMNE is trying to impose its nationalist mindset on children?', "Does Hristijan Mickoski's proposal reflect a broader nationalistic agenda by VMRO-DPMNE, and if so, what are the implications for the country's future integration into Euro

In [None]:
# Print Claims_and_Evidence_dict in a pretty format
print(json.dumps(Claims_and_Evidence_dict, indent=4))

{
    "Himni dhe simbolet shtet\u00ebrore kan\u00eb vendin dhe koh\u00ebn e vet se ku p\u00ebrdoren": {
        "What is the purpose of singing national anthems according to Hristijan Mickoski's proposal?": [
            {
                "title": "North Macedonia's parliament approves a new center-right ...",
                "link": "https://apnews.com/article/north-macedonia-mickoski-government-8021f259cd9b8fd3f128f35f533ab481",
                "snippet": "Jun 23, 2024 ... ... proposed by a center-right party that won May's national elections. ... Hristijan Mickoski, 46, faces significant challenges in his four\u00a0..."
            },
            {
                "title": "MONOETHNIC ANTHEM: Education under the claws of nationalism ...",
                "link": "https://koha.mk/en/himni-monoetnik-arsimi-nen-kthetrat-e-nacionalizmit/",
                "snippet": "Dec 16, 2024 ... The law on secondary education that foresees the singing of the national anthem in schools is the result

## Identify Stance of Evidence

In [96]:
def detect_stance(claim, evidence):
    prompt = f"""Analyze whether this evidence supports or refutes the claim.
    Claim: "{claim}"
    Evidence: "{evidence}"
    Respond only with on of the following options: Completely Supports, Partially Support, Refute, Irrelevant, or No Evidence.
    """
    extract_stance_payload = {
    "model": "llama3",
    "prompt": prompt,
    }
    stance_response = requests.post(llama3_url, json=extract_stance_payload)
    if stance_response.status_code == 200:
        # Process the streamed response
        lines = stance_response.text.splitlines()
        output = ""

        for line in lines:
            try:
                # Parse each line as JSON
                line_data = json.loads(line)
                # Concatenate the "response" field
                output += line_data.get("response", "")
            except json.JSONDecodeError:
                # Skip lines that are not valid JSON
                continue

        # Parse the final concatenated output as JSON
        # Remove everything after the newline character
        output = output.split('\n')[0]
        return output
    else:
        print(f"Error: {stance_response.status_code}")
        print(stance_response.text)
    return None

In [105]:
stance_results = {}


for claim, evidence in tqdm.tqdm(Claims_and_Evidence_dict.items()):
    for question, evidences in tqdm.tqdm(evidence.items()):
        for evidence in tqdm.tqdm(evidences):
            stance = detect_stance(claim, evidence)
            if claim not in stance_results:
                stance_results[claim] = {}
            if question not in stance_results[claim]:
                stance_results[claim][question] = []
            stance_results[claim][question].append({
                'evidence': evidence,
                'stance': stance
            })

100%|██████████| 3/3 [00:47<00:00, 15.90s/it]
100%|██████████| 3/3 [01:00<00:00, 20.25s/it]
100%|██████████| 3/3 [01:19<00:00, 26.34s/it]
100%|██████████| 3/3 [00:56<00:00, 18.95s/it]
100%|██████████| 3/3 [01:02<00:00, 20.90s/it]
100%|██████████| 3/3 [01:14<00:00, 24.84s/it]
100%|██████████| 3/3 [00:45<00:00, 15.18s/it]
100%|██████████| 7/7 [07:07<00:00, 61.02s/it]


In [106]:
# Print stance_results in a pretty format
print(json.dumps(stance_results, indent=4))

{
    "Himni dhe simbolet shtet\u00ebrore kan\u00eb vendin dhe koh\u00ebn e vet se ku p\u00ebrdoren": {
        "What is the purpose of singing national anthems according to Hristijan Mickoski's proposal?": [
            {
                "evidence": {
                    "title": "North Macedonia's parliament approves a new center-right ...",
                    "link": "https://apnews.com/article/north-macedonia-mickoski-government-8021f259cd9b8fd3f128f35f533ab481",
                    "snippet": "Jun 23, 2024 ... ... proposed by a center-right party that won May's national elections. ... Hristijan Mickoski, 46, faces significant challenges in his four\u00a0..."
                },
                "stance": "Partially Supports"
            },
            {
                "evidence": {
                    "title": "MONOETHNIC ANTHEM: Education under the claws of nationalism ...",
                    "link": "https://koha.mk/en/himni-monoetnik-arsimi-nen-kthetrat-e-nacionalizmit/",
   

## Determine Correction

In [None]:
def determine_verdict(stance_results):
    verdicts = []
    for claim, questions in stance_results.items():
        for question, evidences in questions.items():
            # Make sure that all the stance results are in the same format
            for evidence in evidences:
                for key in evidence:
                    if key == 'stance':
                        stance = evidence['stance']
                        
                        if stance != 'Completely Supports' and stance != 'Partially Support' and stance != 'Refute':
                            evidence['stance'] = 'Irrelevant'
                            # Search if the sentence contains any of the words
                            if 'Completely Supports' in stance:
                                evidence['stance'] = 'Completely Supports'
                            elif 'Partially Support' in stance:
                                evidence['stance'] = 'Partially Support'
                            elif 'Refute' in stance:
                                evidence['stance'] = 'Refute'
                            elif 'Irrelevant' in stance:
                                evidence['stance'] = 'Irrelevant'
                            elif 'No Evidence' in stance:
                                evidence['stance'] = 'No Evidence'
                        print(stance)
                    
            
            supports = sum(1 for evidence in evidences if evidence['stance'] == 'Completely Supports' or evidence['stance'] == 'Partially Support')
            refutes = sum(1 for evidence in evidences if evidence['stance'] == 'Refute')
            if supports > refutes:
                verdicts.append((claim, question, "Supports"))
            elif refutes > supports:
                verdicts.append((claim, question, "Refutes"))
            else:
                verdicts.append((claim, question, "Neutral"))
                
            
            
    return verdicts

In [124]:
determine_verdict(stance_results)

Partially Support
Partially Support
Refute
Partially Support
Partially Support
Partially Support
Partially Support
No Evidence
Irrelevant
Refute
Refute
Refute
Refute
Refute
Refute
Refute
Refute
Partially Support
Partially Support
Partially Support
No Evidence
Partially Support
Partially Support
Partially Support
Partially Support
Irrelevant
Refute
No Evidence
Partially Support
Partially Support
Irrelevant
Irrelevant
Irrelevant
Partially Support
Partially Support
Partially Support
Irrelevant
Partially Support
Partially Support
Partially Support
Partially Support
Partially Support
No Evidence
Partially Support
Partially Support
No Evidence
Partially Support
No Evidence
No Evidence
Partially Support
Partially Support
No Evidence
Partially Support
Partially Support
No Evidence
Irrelevant
Irrelevant


[('Himni dhe simbolet shtetërore kanë vendin dhe kohën e vet se ku përdoren',
  "What is the purpose of singing national anthems according to Hristijan Mickoski's proposal?",
  'Supports'),
 ('Himni dhe simbolet shtetërore kanë vendin dhe kohën e vet se ku përdoren',
  'Is it true that VMRO-DPMNE promotes nationalism and anti-European agendas, as claimed by the author?',
  'Supports'),
 ('Himni dhe simbolet shtetërore kanë vendin dhe kohën e vet se ku përdoren',
  'Does the author suggest that VMRO-DPMNE isolates the country and hinders its future prospects in Europe?',
  'Supports'),
 ('The proposal of the leader of VMRO-DPMNE Hristijan Mickoski to make children sing the national anthem every month reflects the nationalist mindset of VMRO-DPMNE',
  'Is the proposal to make children sing the national anthem every month a common practice in any country?',
  'Refutes'),
 ('The proposal of the leader of VMRO-DPMNE Hristijan Mickoski to make children sing the national anthem every month re

In [125]:
def claims_verdict(claims, stance_results):
    verdicts = []
    for claim in claims:
        supports = sum(1 for question, evidences in stance_results[claim].items() if any(evidence['stance'] == 'Completely Supports' or evidence['stance'] == 'Partially Support' for evidence in evidences))
        refutes = sum(1 for question, evidences in stance_results[claim].items() if any(evidence['stance'] == 'Refute' for evidence in evidences))
        if supports > refutes:
            verdicts.append((claim, "Supports"))
        elif refutes > supports:
            verdicts.append((claim, "Refutes"))
        else:
            verdicts.append((claim, "Neutral"))
    return verdicts
claims_verdict(claims, stance_results)

[('Himni dhe simbolet shtetërore kanë vendin dhe kohën e vet se ku përdoren',
  'Supports'),
 ('The proposal of the leader of VMRO-DPMNE Hristijan Mickoski to make children sing the national anthem every month reflects the nationalist mindset of VMRO-DPMNE',
  'Refutes'),
 ('VMRO-DPMNE spreads unjust nationalism while making excuses for anti-European agendas',
  'Supports'),
 ('VMRO-DPMNE fosters hatred and division', 'Supports'),
 ('VMRO-DPMNE blocks the European future of young generations', 'Supports'),
 ('VMRO-DPMNE isolates the country', 'Supports'),
 ('VMRO-DPMNE exercises power against Albanian nation in the municipalities that lead its quotas',
  'Supports')]