/
pipeline.py
38 lines (27 loc) · 897 Bytes
/
pipeline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import os
import dlt
from yato import Yato
from ghost import ghost_source, remove_columns
if __name__ == "__main__":
DATABASE_FILE_NAME = "ghostdb.duckdb"
yato = Yato(
database_path="ghostdb.duckdb",
sql_folder="transform/sql/",
s3_bucket=os.getenv("S3_BUCKET"),
s3_access_key=os.getenv("S3_ACCESS_KEY"),
s3_secret_key=os.getenv("S3_SECRET_KEY"),
s3_endpoint_url=os.getenv("S3_ENDPOINT"),
s3_region_name=os.getenv("S3_REGION_NAME"),
)
yato.restore(overwrite=True)
pipeline = dlt.pipeline(
pipeline_name="ghostdb",
destination=dlt.destinations.duckdb(credentials=DATABASE_FILE_NAME),
dataset_name="ghost",
)
data = ghost_source()
data = data.get_activities.add_map(remove_columns)
load_info = pipeline.run(data)
print(load_info)
yato.backup()
db = yato.run()