In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install pyngrok worldnewsapi

Collecting pyngrok
  Downloading pyngrok-7.1.6-py3-none-any.whl (22 kB)
Collecting worldnewsapi
  Downloading worldnewsapi-1.0.11-py3-none-any.whl (31 kB)
Installing collected packages: pyngrok, worldnewsapi
Successfully installed pyngrok-7.1.6 worldnewsapi-1.0.11


In [3]:
%%capture
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps xformers trl peft accelerate bitsandbytes

In [4]:
from unsloth import FastLanguageModel
import torch

max_seq_length = 2048
dtype = None
load_in_4bit = True

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


In [5]:
from unsloth import FastLanguageModel
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "/content/drive/MyDrive/NLP-NewsSummary/Llama3_cnn_dailymail",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

config.json:   0%|          | 0.00/1.20k [00:00<?, ?B/s]

==((====))==  Unsloth: Fast Llama patching release 2024.5
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.3.0+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. Xformers = 0.0.26.post1. FA = False.
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth


model.safetensors:   0%|          | 0.00/5.70G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/172 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Unsloth 2024.5 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [6]:
def summarize(text):
    inputs = tokenizer(
    [
        alpaca_prompt.format(
            "Summarize the following news", # instruction
            text,
            "", # output
        )
    ], return_tensors = "pt").to("cuda")

    outputs = model.generate(**inputs, max_new_tokens = 256, use_cache = True)
    return tokenizer.batch_decode(outputs, skip_special_tokens=True)[0].split("Response:\n", 1)[-1]

In [7]:
import worldnewsapi
from worldnewsapi.rest import ApiException
import os
from datetime import date, timedelta
from flask import Flask, jsonify
from pyngrok import ngrok
from dotenv import load_dotenv

In [8]:
load_dotenv()

API_KEY = os.getenv('NEWS_API_KEY')
AUTH_TOKEN = os.getenv('NGROK_AUTH_TOKEN')

In [14]:
newsapi_configuration = worldnewsapi.Configuration(api_key={'apiKey': API_KEY})

In [23]:
try:
    newsapi_instance = worldnewsapi.NewsApi(worldnewsapi.ApiClient(newsapi_configuration))

    max_results = 10
    offset = 0
    all_results = []

    while len(all_results) < max_results:
        request_count = min(100, max_results - len(all_results))

        response = newsapi_instance.search_news(
            text='',
            source_countries='us',
            language='en',
            earliest_publish_date=str(date.today() - timedelta(1)),
            latest_publish_date=str(date.today()),
            sort="publish-time",
            sort_direction="desc",
            min_sentiment=-0.8,
            max_sentiment=0.8,
            offset=offset,
            number=request_count)

        print(f"Retrieved {len(response.news)} articles. Offset: {offset}/{max_results}. Total available: {response.available}.")

        for article in response.news:
            summarized_text = summarize(article.text)
            all_results.append({
                "Title": article.title,
                "Author": article.author,
                "URL": article.url,
                "Text": summarized_text,
                "Image": article.image
            })

        offset += 100

except worldnewsapi.ApiException as e:
    print(f"Exception when calling NewsApi->search_news: {e}\n")

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Retrieved 10 articles. Offset: 0/10. Total available: 5119.


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


In [25]:
app = Flask(__name__)
current_index = 0

@app.route('/', methods=['GET'])
def index():
    global current_index

    if current_index >= len(all_results):
        current_index = 0

    article = all_results[current_index]
    current_index += 1

    return jsonify(article)

if __name__ == '__main__':
    !ngrok authtoken AUTH_TOKEN

    public_url = ngrok.connect(5000)
    print(f"ngrok tunnel opened at {public_url}")


    app.run()

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml
ngrok tunnel opened at NgrokTunnel: "https://2122-34-125-246-140.ngrok-free.app" -> "http://localhost:5000"
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [31/May/2024 16:43:58] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [31/May/2024 16:44:00] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [31/May/2024 16:44:03] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [31/May/2024 16:44:05] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [31/May/2024 16:44:08] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [31/May/2024 16:44:11] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [31/May/2024 16:44:55] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [31/May/2024 16:45:04] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [31/May/2024 16:45:05] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
INFO:werkzeug:127.0.0.1 - - [31/May/2024 16:45:08] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [31/May/2024 16:45:10] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [31/May/2024 16:45:13] "GET / HTTP/1.1" 200 -
INFO:werkzeu