In [7]:
import os
import json
import re
import string
from pprint import pprint
from pathlib import Path
from collections import defaultdict

from dotenv import load_dotenv
from langchain.llms import OpenAI

load_dotenv()

True

In [8]:
data_root = os.path.join("naija_highlights", "data", "bronze")
punch_root = os.path.join(data_root, "punchng")
sun_root = os.path.join(data_root, "Sunnewsonline")

def get_data_paths(scraped_data_root):
    """ get paths to data"""
    data_paths = defaultdict(list)
    for root, dirs, files in os.walk(scraped_data_root):
        if "items.json" in files:
            key = Path(root).stem
            for f in files:
                value = os.path.join(root, f)
                data_paths[key].append(value)
    return data_paths


def read_data(path):
    """ Read Json Data"""
    data = []
    with open(path, "r") as f:
        for line in f:
            data.append(json.loads(line))
    
    return data

In [9]:
punch_data_paths = get_data_paths(punch_root)
sun_data_paths = get_data_paths(sun_root)
punch_data_paths

defaultdict(list,
            {'day=26': ['naija_highlights/data/bronze/punchng/year=2019/month=6/day=26/items.json'],
             'day=24': ['naija_highlights/data/bronze/punchng/year=2019/month=6/day=24/items.json'],
             'day=25': ['naija_highlights/data/bronze/punchng/year=2019/month=6/day=25/items.json'],
             'day=1': ['naija_highlights/data/bronze/punchng/year=2023/month=7/day=1/items.json'],
             'day=27': ['naija_highlights/data/bronze/punchng/year=2023/month=6/day=27/items.json'],
             'day=29': ['naija_highlights/data/bronze/punchng/year=2023/month=6/day=29/items.json'],
             'day=28': ['naija_highlights/data/bronze/punchng/year=2023/month=6/day=28/items.json'],
             'day=30': ['naija_highlights/data/bronze/punchng/year=2023/month=6/day=30/items.json']})

## Punch

In [10]:
sample_number = 9
sample_data = read_data(punch_data_paths["day=29"][0])[sample_number]
sample_data

{'weblink': 'https://punchng.com/fintiri-bans-tree-burning-for-charcoal-in-adamawa/',
 'title': ' Fintiri bans tree burning for charcoal in Adamawa ',
 'postdate': [29, 6, 2023],
 'thumbnaillink': 'https://cdn.punchng.com/wp-content/uploads/2023/04/18180001/Adamawa-State-Governor-Ahmadu-Fintiri.jpg',
 'author': 'Hindi Livinus',
 'body': ['Governor of Adamawa State Ahmadu Umaru Fintiri has outlawed the felling of trees for charcoal fuel to curb deforestation and climate change.',
  'Fintiri issued the ban while hosting a delegation of traditional leaders in the state led by the Presidential Candidate of the Peoples Democratic Party Former Vice President Atiku Abubakar who represented the Lamido of Adamawa Dr. Barkindo Mustapha Chairman of the Adamawa Council of Emirs and Chiefs on Sallah homage at the Presidential Lounge Yola Government House.',
  'Governor Fintiri who thanked the traditional rulers for finding it worthy to felicitate with him on the auspicious occasion sued for their c

## Sun Paper

In [11]:
sample_number = 30
sample_data = read_data(sun_data_paths["day=27"][0])[sample_number]
sample_data

{'weblink': 'https://sunnewsonline.com/the-chase-for-credible-leadership-title/',
 'title': 'The chase for credible leadership title',
 'postdate': [27, 6, 2023],
 'thumbnaillink': 'https://assets.sunnewsonline.com/2022/02/Insights.jpg',
 'author': 'Anonymous',
 'body': ['',
  '',
  '',
  '',
  '',
  'The race for leadership credibility is on in Nigeria. Former and current Presidents and military dictators have argued repeatedly over the person who achieved the most during their tenures. The rush for national recognition of feats achieved or hyped by a President began when Ibrahim Babangida compared his military government with that of Olusegun Obasanjo. In late January 2008 Obasanjo who was recovering from official and unofficial public exposure of his personal and public life claimed that during his two terms as President he achieved the best for Nigeria. The jury is still out on that claim.',
  '',
  'Now claims are being made about the rocket-style speed with which Bola Ahmed Tinub

# Highlights - Select Article

In [12]:
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from pprint import pprint

def create_chain(prompt:str, temperature:float):
    """generate an highlight template of the article"""
    llm = OpenAI(openai_api_key=os.environ.get("OPENAI_API_KEY"), 
                 temperature=temperature)
    template = prompt +  " {newsarticle}"
    prompt = PromptTemplate(
                input_variables=["newsarticle"],
                template=template)

    print(f"sample prompt is: {prompt.format(newsarticle='Abacha was the president')}")
    chain = LLMChain(llm=llm, prompt=prompt)
    return chain


def select_article(
    news_agency,
    day,
    sample):
    """selects a crawled article """

    data_paths = sun_data_paths if news_agency == "sun" else punch_data_paths
    article = read_data(data_paths[f"day={day}"][0])[sample]
    title = article["title"]
    article = " ".join(article["body"])
    return article, title
    

In [22]:
news_agency="sun"
day=30
sample=6
article, title = select_article(news_agency, day, sample)
print("-- Title: ", title, "--")
pprint(article)

-- Title:  Abia Panel of Inquiry: Only looters ‘ll condemn Otti – Apugo, APC Chieftain --
('Speaking in Umuahia Apugo said officials of the previous administrations '
 'looted the state so blind that no Governor who would want to develop the '
 'state and the people take him seriously will not want to recover such '
 'properties and funds. “These people looted the state blind and no Governor '
 'who will not only want to rebuild the state but want to have the trust and '
 'confidence of his people will not want to recover the looted funds”. On the '
 'recovery of Government properties Apugo urged the Panel to beam its '
 'searchlight proper on the activities of the government before that of '
 'Ikpeazu’s. “That Government did some many things wrong hiding under PIU they '
 'forcefully collected people’s land without compensation and in turn '
 'allocated them free to their cronies. “To perfect their atrocities the '
 'Governor then removed a qualified Town Planner which the law establi

## Text Summarization

In [23]:
prompt = "Summarize the news article in a clear and succinct manner, keeping it within 80 words. News article: "
temperature=0.4

highlight_chain = create_chain(prompt, temperature)
output = highlight_chain.run(article)

pprint(output)

sample prompt is: Summarize the news article in a clear and succinct manner, keeping it within 80 words. News article:  Abacha was the president
('\n'
 '\n'
 "Governor Okezie Ikpeazu's administration is being probed for looting the "
 'state blind, and Apugo urges the panel to also investigate the previous '
 'administrations. He also encourages Governor Otti not to listen to those '
 'threatening hell and brimstone over the probe, and to suspend all Permanent '
 'Secretaries and HoS in the state. Apugo believes this is the right thing to '
 "do, citing former President Muhammadu Buhari's mistake of not removing "
 'Godwin Emefiele as CBN Governor.')


## NER

In [24]:
prompt=""" Extract the important entities mentioned in the text below. First extract all company names, then extract all people names, and finally extract general overarching themes
Desired format: Company names: <comma_separated_list_of_company_names> People names: -||- General themes: -||-  Text:" """
temperature=0.4

ner_chain = create_chain(prompt, temperature)
output = ner_chain.run(article)

print("\n")
print(output)

sample prompt is:  Extract the important entities mentioned in the text below. First extract all company names, then extract all people names, and finally extract general overarching themes
Desired format: Company names: <comma_separated_list_of_company_names> People names: -||- General themes: -||-  Text:"  Abacha was the president



Company names: None 
People names: T.A Orji, Okezie Ikpeazu, Gov Otti, Godwin Emefiele 
General themes: Looting, Recovery of Government Properties, Suspension of Permanent Secretaries and HoS


## Emotion Detection

In [26]:
prompt = """ Give the major two emotional tones of the text and rate the intensity of the emotion on a scale of 1 to 10. 
Desired format:
emotional tone: <comma_separated_list_of_emotional_tone>
intensity: <number> Text:"""
temperature=1

emotion_chain = create_chain(prompt, temperature)
output = emotion_chain.run(article)

pprint("\n")
print(output)

sample prompt is:  Give the major two emotional tones of the text and rate the intensity of the emotion on a scale of 1 to 10. 
Desired format:
emotional tone: <comma_separated_list_of_emotional_tone>
intensity: <number> Text: Abacha was the president
'\n'


emotional tone: Anger, disappointment
intensity: 8
