In [66]:
from sqlalchemy import TextClause, Row
import re

import pandas as pd
import sys

sys.path.insert(1, r"..\models")
sys.path.insert(2, r"..\etl")

from lstm_model import load_model_weights_from_checkpoint
from connect_db import connect_to_grad_db

In [2]:
model = load_model_weights_from_checkpoint() 

In [4]:
engine = connect_to_grad_db()

In [12]:
with engine.connect() as conn:
    query = TextClause("select content from news_api")
    content = conn.execute(query).fetchall()

In [35]:
content[1]._data[0]

"\n\n\n\n\n\n\n\n\n\nDuangpetch Phromthep speaks at a news conference in Thailand following his rescue from a cave in July 2018. \n\nSoe Zeya Tun/Reuters\n\n\n\n\n\n\n\nLondon\nCNN\n        \xa0—\xa0\n    \n\n\nDuangphet Phromthep, one of the 12 boys rescued from a flooded Thai cave after a weekslong operation that drew global attention in 2018, has died in the UK, British and Thai officials announced Wednesday. \n  \n\n      Phromthep, who was enrolled in a soccer academy in Leicestershire, England, died after being rushed to hospital on Sunday, Leicestershire Police said in a statement to CNN. \n  \n\n      Th northern regional branch of the Thai government’s public relations arm said on Facebook that Phromthep, 17, died due to an accident, without providing more details. \n  \n\n      “The atmosphere at his house in Chiang Rai province was full of sorrow,” PR Thailand’s statement said. \n  \n\n      Zico Foundation, a Thai non-profit organization which had helped Phromthep study in 

In [73]:
def ready_text_for_pred(text):
    processed_text = re.sub(r" +", " ", (re.sub(r"[^a-zA-Z]", " ", text)).lower().strip())
    return processed_text

def preprocess_article_content(content):
    content = content.strip("\n")

    # Remove all special characters except sentence defining punctuation
    cleaned_content = re.sub(r"[^.!?\w]", " ", content)

    # Remove multiple spaces
    cleaned_content = re.sub(r"[ ]+", " ", cleaned_content)

    # Split on sentence punctuation
    split_sentences = re.split(r"[.!?]", cleaned_content)

    cleaned_sentences = [ready_text_for_pred(sentence) for sentence in split_sentences if sentence != ""]
    
    return cleaned_sentences

In [74]:
sentences = preprocess_article_content(content[1]._data[0])

In [75]:
preds = model.predict(sentences)



In [63]:
stats = {sentence:pred for sentence, pred in zip(sentences, preds)}

In [76]:
df = pd.DataFrame(
    {
        "sentence":sentences,
        "prediction":preds.flatten()
    }
)

In [77]:
df

Unnamed: 0,sentence,prediction
0,duangpetch phromthep speaks at a news conferen...,2.382278
1,soe zeya tun reuters london cnn duangphet phro...,3.167635
2,phromthep who was enrolled in a soccer academy...,1.757906
3,th northern regional branch of the thai govern...,2.639367
4,the atmosphere at his house in chiang rai prov...,1.809712
5,zico foundation a thai non profit organization...,1.808854
6,relatives of duangphet phromthep greet him fol...,2.400391
7,sakchai lalit ap a daring rescue phromthep was...,3.235713
8,the boys and their coach became trapped when r...,2.747595
9,divers involved in the rescue described treach...,2.171447
