In [49]:
import sys
import os
from dotenv import load_dotenv
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from tqdm import tqdm
from sqlalchemy.exc import SQLAlchemyError
sys.path.append("..")  # Adds higher directory to python modules path.
from models import Link,Post, Base
from parse_utils import get_full_text


In [26]:
# Setup SQLAlchemy
engine = create_engine(os.getenv("DATABASE_URL"))

Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)


In [60]:
def create_link(company, url):
    session = Session()
    link = Link(company=company, link=url)
    session.add(link)
    try:
        session.commit()
        print(f"Link created for {company} with ID: {link.id}")
    except Exception as e:
        print(f"Error occurred while saving link to database: {e}")
        session.rollback()
    finally:
        session.close()
    return link

def get_all_links():
    session = Session()
    links = session.query(Link).all()
    session.close()
    return links

def get_all_posts():
    session = Session()
    posts = session.query(Post).all()
    session.close()
    return posts

def delete_link(url):
    session = Session()
    link = session.query(Link).filter_by(link=url).first()
    if link is None:
        print(f"No link found for url {url}")
        return

    try:
        session.delete(link)
        session.commit()
        print(f"Link deleted for url {url}")
    except SQLAlchemyError as e:
        session.rollback()  # Roll back the transaction on error
        print(f"Error occurred while deleting link from database: {e}")
    finally:
        session.close()  # Always close the session when you're done with it
        
        
def delete_posts():
    session = Session()
    try:
        session.query(Post).delete()
        session.commit()
        print("All posts deleted.")
    except SQLAlchemyError as e:
        session.rollback()  # Roll back the transaction on error
        print(f"Error occurred while deleting posts from database: {e}")
    finally:
        session.close()  # Always close the session when you're done with it




In [41]:
# List of URLs and companies
data = [
    {"company": "databricks", "url": "https://www.databricks.com/blog/category/data-strategy/feed"},
    {"company": "databricks", "url": "https://www.databricks.com/blog/category/best-practices/feed"}
]

# Create Link objects and fetch full text for each URL
for item in data:
    link = create_link(item["company"], item["url"])
#     full_text = get_full_text(item["url"])


Link created for databricks with ID: 4
Link created for databricks with ID: 5


<models.Link at 0x118df8640>

In [69]:
posts = get_all_posts()
link = get_all_links()

print(f'posts: {len(posts)}')
print(f'links: {len(links)}')

# # posts = [post for post in posts]

# # # Print first 10...
for post in posts[:10]:
    print(post.fulltext)

posts: 10
links: 2


AttributeError: 'Post' object has no attribute 'fulltext'

In [61]:
delete_posts()

All posts deleted.
