# Classification

## Extract key figures

In [30]:
import re
import requests

def extract_politicans(data):
    title = data.get('title')
    description = data.get('description')
    maintext = data.get('maintext')

    content = f"""
        <instructions>
            List the names of the most important persons mentioned in the article.
            Output only the names, no need to provide any additional information.
            
            <output_format>
                1. <FirstName> <LastName>, <Suffix>
                2. <FirstName> <LastName>, <Suffix>
                ...
            </output_format>

        </instructions>
        <headline>{title}</headline>
        <description>{description}</description>
        <content>{maintext}</content>
    """

    url = 'http://100.68.153.101:11434/api/generate'
    response = requests.post(url, json={
        'model': 'llama3',
        'prompt': content,
        'stream': False
    })
    
    text = response.json()['response'];
    lines = text.strip().split('\n')
    unique_names = set()

    pattern = r'^\d+\.\s*([^,\n]+(?:\s+[^,\n]+)*)'
    for line in lines:
        matches = re.findall(pattern, line)
        for match in matches:
            unique_names.add(match.strip())
    
    sorted_names = sorted(unique_names)
    return sorted_names

    
dataset_with_politicians = []
def main(input_file):
    with open(input_file, 'r') as infile:
        for line in infile:
            if line:
                m = {}
                exec("data = "+line.strip(), m)
                data = m['data']
                persons = extract_politicans(data)
                
                for name in persons:
                    print(name)

                dataset_with_politicians.append({
                    'title': data.get('title'),
                    'description': data.get('description'),
                    'maintext': data.get('maintext'),
                    'politicians': persons,
                    'source_domain': data.get('source_domain') if data.get('source_domain') else 'smninewschannel.com'
                })

main("files/articles.txt")


Ferdinand Marcos Jr.
Ferdinand Marcos Jr.
Lucas Bersamin
Diana Mackey
Donald Trump
Ferdinand Marcos Jr.
Kamala Harris
Kiefer Ravena
LeBron James
Sara Duterte
Alejandro Tengco
Ferdinand Marcos Jr.
Juan Ponce Enrile
Katrina Ponce Enrile
Menardo Guevarra
Winston Casio
Ferdinand Marcos Jr.
Sara Duterte
Chiz Escudero
Ferdinand Marcos Jr.
Imee Marcos
Sara Duterte
Bato Dela Rosa
Chiz Escudero
Ferdinand Marcos Jr
Israelito Torreon
J-Hope
Leni Robredo
Liam Payne
Lorraine Badoy
Rodrigo Duterte
Sara Duterte
Jonathan Keith Flores
Luistro
Michael Poa
Nolasco Mempin
Sara Duterte
Ernesto Dionisio Jr.
Gloria Camora
Joel Chua
Sara Duterte
Joel Chua
Sara Duterte
Bam Aquino
Leni Robredo
Benigno Paolo "Bam" Aquino
Camille Villar
Chiz Escudero
Ferdinand Marcos Jr.
Leni Robredo
Lito Lapid
Tito Sotto
Analiza Tabujara-Soriano
Eugenio Jose "Bong" Lacson
Ferdenand "Bongbong Gorilla" Diego
Gabriel Bordado Jr.
John Barry Tayam
John Rey Tabujara
Jurry Nabaja
Leni Robredo
Paul Octaviano
Vicente Perez
Bong Revilla
F

In [31]:

def get_sentiment(data, politician):
    title = data.get('title')
    description = data.get('description')
    maintext = data.get('maintext')

    content = f"""
        <instructions>
            Determine whether the article presents the politician
            in a positive or negative light. 

            Output only the sentiment, no need to provide any additional information.

            <output_format>
                POSITIVE
            </output_format>

            <output_format>
                NEGATIVE
            </output_format>

            <output_format>
                NEUTRAL
            </output_format>
        </instructions>
        <politician>{politician}</politician>
        <article>
            <headline>{title}</headline>
            <description>{description}</description>
            <content>{maintext}</content>
        </article>
    """

    url = 'http://100.68.153.101:11434/api/generate'
    response = requests.post(url, json={
        'model': 'llama3',
        'prompt': content,
        'stream': False
    })
    return response.json()['response']

dataset_with_sentiments = []
for data in dataset_with_politicians:
    sentiments = []
    for politician in data['politicians']:
        sentiment = get_sentiment(data, politician)
        print(f"{politician}: {sentiment}")
        sentiments.append((politician, sentiment))

    dataset_with_sentiments.append({
        'title': data.get('title'),
        'description': data.get('description'),
        'maintext': data.get('maintext'),
        'source_domain': data.get('source_domain'),
        'sentiments': sentiments
    })



Ferdinand Marcos Jr.: NEUTRAL
Ferdinand Marcos Jr.: POSITIVE
Lucas Bersamin: POSITIVE
Diana Mackey: NEUTRAL
Donald Trump: NEGATIVE
Ferdinand Marcos Jr.: POSITIVE
Kamala Harris: POSITIVE
Kiefer Ravena: NEUTRAL
LeBron James: POSITIVE
Sara Duterte: NEGATIVE
Alejandro Tengco: POSITIVE
Ferdinand Marcos Jr.: POSITIVE
Juan Ponce Enrile: NEUTRAL
Katrina Ponce Enrile: POSITIVE
Menardo Guevarra: POSITIVE
Winston Casio: POSITIVE
Ferdinand Marcos Jr.: NEGATIVE
Sara Duterte: NEGATIVE
Chiz Escudero: NEGATIVE
Ferdinand Marcos Jr.: NEGATIVE
Imee Marcos: NEGATIVE
Sara Duterte: NEGATIVE
Bato Dela Rosa: NEUTRAL
Chiz Escudero: NEUTRAL
Ferdinand Marcos Jr: NEUTRAL
Israelito Torreon: NEGATIVE
J-Hope: POSITIVE
Leni Robredo: NEUTRAL
Liam Payne: NEGATIVE
Lorraine Badoy: NEGATIVE
Rodrigo Duterte: POSITIVE
Sara Duterte: NEGATIVE
Jonathan Keith Flores: NEGATIVE
Luistro: NEGATIVE
Michael Poa: NEGATIVE
Nolasco Mempin: NEGATIVE
Sara Duterte: NEGATIVE
Ernesto Dionisio Jr.: NEGATIVE
Gloria Camora: NEGATIVE
Joel Chua: 

# Visualization


In [32]:
import matplotlib.pyplot as plt
from collections import Counter

sentiments = []
for data in dataset_with_sentiments:
    for politician, sentiment in data['sentiments']:
        sentiments.append({
            'title': data.get('title'),
            'description': data.get('description'),
            'sentiment': sentiment,
            'source': data.get('source_domain'),
            'politician': politician
        })


sentiments

[{'title': "Reality check? Marcos admits flood control infra 'not enough'",
  'description': 'After saying that over 5,500 flood control projects have been completed at the time of his third SONA, Marcos has to face the flooding problem squarely in the wake of Severe Tropical Storm Kristine',
  'sentiment': 'NEUTRAL',
  'source': 'www.rappler.com',
  'politician': 'Ferdinand Marcos Jr.'},
 {'title': 'Marcos declares November 4 as day of national mourning for Kristine victims',
  'description': "'The national flag shall be flown at half-mast from sunrise to sunset on all government buildings and installations across the country and abroad,' the Presidential Communications Office says",
  'sentiment': 'POSITIVE',
  'source': 'www.rappler.com',
  'politician': 'Ferdinand Marcos Jr.'},
 {'title': 'Marcos declares November 4 as day of national mourning for Kristine victims',
  'description': "'The national flag shall be flown at half-mast from sunrise to sunset on all government buildings a

In [33]:
import csv

# Specify the CSV file name
csv_file = 'files/politician_sentiments.csv'

# Write the data to a CSV file
with open(csv_file, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=sentiments[0].keys())
    writer.writeheader()
    writer.writerows(sentiments)

print(f'Data saved to {csv_file}')

Data saved to files/politician_sentiments.csv
