In [None]:
import re

import pandas as pd
import sys
import keras
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay

import numpy as np
from sqlalchemy import TextClause

sys.path.insert(1, r"..\models")
sys.path.insert(2, r"..\etl")

from connect_db import connect_to_grad_db

In [None]:
model = keras.models.load_model('../../saved_models/2dcnn_32_16_32_250k_rows/')

In [None]:
engine = connect_to_grad_db()

In [None]:
with engine.connect() as conn:
    query = TextClause("select content from news_api")
    content = conn.execute(query).fetchall()

In [None]:
def ready_text_for_pred(text):
    processed_text = re.sub(r" +", " ", (re.sub(r"[^a-zA-Z]", " ", text)).lower().strip())
    return processed_text

def preprocess_article_content(content):
    content = content.strip("\n")

    # Remove all special characters except sentence defining punctuation
    cleaned_content = re.sub(r"[^.!?\w]", " ", content)

    # Remove multiple spaces
    cleaned_content = re.sub(r"[ ]+", " ", cleaned_content)

    # Split on sentence punctuation
    split_sentences = re.split(r"[.!?]", cleaned_content)

    
    return split_sentences

In [None]:
def create_preds(article_content):
    sentences = preprocess_article_content(article_content)
    cleaned_sentences = [ready_text_for_pred(x) for x in sentences]
    preds = [model.predict([x]) for x in cleaned_sentences]

    sentences_w_preds = []
    for sentence,pred in zip(sentences, preds):
        if pred > 4:
            pred = 4
        elif pred < 0:
            pred = 0
        sentences_w_preds.append(sentence + " " + str(pred))
    
    mean_pred = np.mean(preds)
    
    return ". ".join(sentences_w_preds), mean_pred

In [None]:
df = pd.read_sql_table(
    "news_api", 
    engine.connect()
)

In [None]:
preds = df["content"].apply(create_preds)

In [None]:
df[["text_w_preds", "mean_pred"]] = preds

In [None]:
texts_w_preds = []
mean_preds = []
for e in preds:
    texts_w_preds.append(e[0])
    mean_preds.append(e[1])

In [None]:
df["mean_preds"] = mean_preds

In [None]:
df["text_w_preds"] = texts_w_preds

In [None]:
engine = connect_to_grad_db()

In [None]:
df.to_sql("news_api_w_preds", engine, index=True, if_exists="replace")