In [52]:
# importing all the dependencies
import pandas as pd
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer as Summarizer
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.stemmers import Stemmer
import csv
from load_data import get_last_n_days_data

import os 
from dotenv import load_dotenv
load_dotenv()

from sqlalchemy import create_engine, MetaData, Table
from sqlalchemy.sql import select
from sqlalchemy import and_,or_,not_
from sqlalchemy import insert

In [53]:
def get_data():
    """fetches data from data from a csv file and returns urls,headlines,
    articles of that day
    Parameters
    ----------
    filename : str, optinal
        path of the input csv file
    Returns 
    -------
    
    A 4-tuple of lists containing urls,headlines,articles
    """

    data = get_last_n_days_data(n=3)
    articles = data['article']
    headlines = data['headline']
    urls = data['url']
    date_published = data['date_published']
    return urls,headlines,articles,date_published

In [54]:
urls, headlines, articles, date_published = get_data()
stemmer = Stemmer("english")
summerizer = Summarizer(stemmer)
summeries = []
for article in articles:
    summery = ""
    parser = PlaintextParser.from_string(article, Tokenizer("english"))
    for sentence in summerizer(parser.document, 2):
        summery += f"{sentence}\n"
    summeries.append(summery)

In [55]:
len(summeries)

124

In [56]:
# Establishing Setup
engine = create_engine(
    f"postgresql+psycopg2://{os.environ['dbUSERNAME']}:{os.environ['dbPASSWORD']}@localhost:5432/ScrapedData"
)
metadata = MetaData(engine)

summery_table = Table('summeries', metadata, autoload=True)

connection = engine.connect()

In [57]:
for i in range(len(summeries)):
    ins = insert(summery_table).values(
        url=urls[i],
        headline=summeries[i],
        date_published=date_published[i],
        summery=summeries[i]
    )
    connection.execute(ins)