In [None]:
import pandas as pd
import sqlalchemy
from fastavro import writer
import pyarrow.parquet as pq
import pyarrow as pa
import io
import os
from sqlalchemy import create_engine, inspect
from apscheduler.schedulers.blocking import BlockingScheduler
import time

# Connect to the database
db_connection_str = 'postgresql://username:password@host:port/database_name'
engine = create_engine(db_connection_str)

In [None]:
query = "SELECT * FROM your_table_name"
df = pd.read_sql(query, engine)
df.to_csv('output.csv', index=False)

In [None]:
df = pd.read_sql(query, engine)
table = pa.Table.from_pandas(df)
pq.write_table(table, 'output.parquet')

In [None]:
df = pd.read_sql(query, engine)
avro_schema = {
    "type": "record",
    "name": "your_table_name",
    "fields": [{"name": col, "type": ["null", "string"]} for col in df.columns]
}
records = df.to_dict(orient='records')
with open('output.avro', 'wb') as f:
    writer(f, avro_schema, records)

In [None]:
def job():
    df = pd.read_sql(query, engine)
    df.to_csv('output_scheduled.csv', index=False)
    table = pa.Table.from_pandas(df)
    pq.write_table(table, 'output_scheduled.parquet')
    records = df.to_dict(orient='records')
    with open('output_scheduled.avro', 'wb') as f:
        writer(f, avro_schema, records)
    print("Job executed")

scheduler = BlockingScheduler()
scheduler.add_job(job, 'interval', minutes=1)
try:
    scheduler.start()
except (KeyboardInterrupt, SystemExit):
    pass

In [None]:
src_engine = create_engine('postgresql://username:password@host:port/source_database')
dest_engine = create_engine('postgresql://username:password@host:port/destination_database')

inspector = inspect(src_engine)
tables = inspector.get_table_names()

for table in tables:
    df = pd.read_sql(f"SELECT * FROM {table}", src_engine)
    df.to_sql(table, dest_engine, if_exists='replace', index=False)

In [None]:
selective_tables = {
    'table1': ['column1', 'column2'],
    'table2': ['column3', 'column4']
}

for table, columns in selective_tables.items():
    cols = ', '.join(columns)
    df = pd.read_sql(f"SELECT {cols} FROM {table}", src_engine)
    df.to_sql(table, dest_engine, if_exists='replace', index=False)

In [None]:
def on_modified(event):
    job()

from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler

class Handler(FileSystemEventHandler):
    def on_modified(self, event):
        if event.src_path == "path/to/watch":
            on_modified(event)

observer = Observer()
observer.schedule(Handler(), path='path/to/watch', recursive=False)
observer.start()

try:
    while True:
        time.sleep(1)
except KeyboardInterrupt:
    observer.stop()

observer.join()