In [1]:
import sys
import os
from dotenv import load_dotenv
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from tqdm import tqdm
from sqlalchemy.exc import SQLAlchemyError
sys.path.append("..")  # Adds higher directory to python modules path.
from models import Link,Post, Base
from parse_utils import get_full_text


In [3]:
# Setup SQLAlchemy
load_dotenv()

engine = create_engine(os.getenv("DATABASE_URL"))

Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)


In [4]:
def create_link(company, url):
    session = Session()
    link = Link(company=company, link=url)
    session.add(link)
    try:
        session.commit()
        print(f"Link created for {company} with ID: {link.id}")
    except Exception as e:
        print(f"Error occurred while saving link to database: {e}")
        session.rollback()
    finally:
        session.close()
    return link

def get_all_links():
    session = Session()
    links = session.query(Link).all()
    session.close()
    return links

def get_all_posts():
    session = Session()
    posts = session.query(Post).all()
    session.close()
    return posts

def delete_link(url):
    session = Session()
    link = session.query(Link).filter_by(link=url).first()
    if link is None:
        print(f"No link found for url {url}")
        return

    try:
        session.delete(link)
        session.commit()
        print(f"Link deleted for url {url}")
    except SQLAlchemyError as e:
        session.rollback()  # Roll back the transaction on error
        print(f"Error occurred while deleting link from database: {e}")
    finally:
        session.close()  # Always close the session when you're done with it
        
        
def delete_post(title):
    session = Session()
    try:
        post = session.query(Post).filter_by(title=title).first()
        session.delete(post)
        session.commit()
        print("All posts deleted.")
    except SQLAlchemyError as e:
        session.rollback()  # Roll back the transaction on error
        print(f"Error occurred while deleting posts from database: {e}")
    finally:
        session.close()  # Always close the session when you're done with it




In [19]:
# List of URLs and companies
data = [
    {"company": "databricks", "url": "https://www.databricks.com/blog/category/industries/financial-services/feed"}]

# Create Link objects and fetch full text for each URL
for item in data:
    link = create_link(item["company"], item["url"])
#     full_text = get_full_text(item["url"])


Link created for databricks with ID: 11


<models.Link at 0x118df8640>

In [6]:
posts = get_all_posts()
links = get_all_links()

print(f'posts: {len(posts)}')
print(f'links: {len(links)}')

# # posts = [post for post in posts]

# # # Print first 10...
for post in posts:
    print(post.fulltext)


posts: 10
links: 3
Discover how to build and manage all your data, analytics and AI use cases with the Databricks Lakehouse Platform Report

Tap the potential of AI
Explore recent findings from 600 CIOs across 14 industries in this MIT Technology Review report Missed Data + AI Summit?   Data + AI Summit is over, but you can still watch the keynotes and 250+ sessions from the event on demand. Connect with validated partner solutions in just a few clicks. See why Gartner named Databricks a Leader for the second consecutive year July 7, 2023 in Data Strategy This is part six of a multi-part series to share key insights and tactics with Senior Executives leading data and AI transformation initiatives. You can read part five of the series here. Beginning in 1987, Southwest Airlines famously standardized on flying a single airplane type — the Boeing 737 class of aircraft. This decision allowed the airline to save on both operations and maintenance — requiring only one type of simulator to tr

All posts deleted.
