Skip to content

Commit 00eaefe

Browse files
committed
refactor
1 parent 0761966 commit 00eaefe

File tree

19 files changed

+108
-82
lines changed

19 files changed

+108
-82
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,8 @@ HackersAndSlackers-d2a47db89384.json
109109
HackersAndSlackers-4893023543f3.json
110110
HackersAndSlackers-59ed81beb2ea.json
111111
HackersAndSlackers-aec129ee8154.json
112+
hackersandslackers-204807-a78d7cf1d0d8.json
113+
*.json
112114

113115
# .DS_Store
114116
.DS_Store

Pipfile.lock

Lines changed: 26 additions & 26 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
from src.sources.bigquery import bqc
2-
from src.sources.database import dbc
3-
from src.queries import queries
1+
from biquery_sql_etl.sources.bigquery import bqc
2+
from biquery_sql_etl.sources.sqldatabase import dbc
3+
from biquery_sql_etl.queries import sql_queries
44

55

66
def main():
77
"""Move data between sources."""
8-
for k, v in queries.items():
8+
for k, v in sql_queries.items():
99
fetched_rows = bqc.fetch_rows(v)
1010
inserted_rows = dbc.insert_rows(fetched_rows, k, replace=True)
1111
print(inserted_rows)

biquery_sql_etl/queries.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
"""Read SQL queries from local files."""
2+
from os import listdir
3+
from os.path import isfile, join
4+
from config import local_sql_folder
5+
6+
7+
def get_local_sql_files():
8+
"""Fetch all SQL query files in folder."""
9+
files = [local_sql_folder + '/' + f for f in listdir(local_sql_folder) if isfile(join(local_sql_folder, f)) if '.sql' in f]
10+
return files
11+
12+
13+
def read_sql_queries():
14+
"""Read SQL query from .sql file."""
15+
files = get_local_sql_files()
16+
file_contents = []
17+
for file in files:
18+
fd = open(file, 'r')
19+
query = fd.read()
20+
file_contents.append(query)
21+
fd.close()
22+
return file_contents
23+
24+
25+
def create_query_dict():
26+
"""Create dictionary of queries for logging purposes."""
27+
file_names = listdir(local_sql_folder)
28+
file_contents = read_sql_queries()
29+
table_names = get_table_name(file_names)
30+
files = dict(zip(table_names, file_contents))
31+
return files
32+
33+
34+
def get_table_name(file_names):
35+
"""Assume that table names of queries are [FILE_NAME]_stats."""
36+
tables = [f"{file.split('.')[0]}_stats" for file in file_names]
37+
return tables
38+
39+
40+
sql_queries = create_query_dict()
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
from .bigquery import bqc
2+
from .sqldatabase import dbc

src/sources/bigquery/__init__.py renamed to biquery_sql_etl/sources/bigquery/__init__.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
from config.bigquery_config import (gcp_credentials,
2-
gcp_project,
3-
bigquery_dataset,
4-
bigquery_table)
1+
from config import (gcp_credentials,
2+
gcp_project,
3+
bigquery_dataset,
4+
bigquery_table)
55
from .bigquery_client import BigQueryClient
66

77
bqc = BigQueryClient(project=gcp_project,

src/sources/bigquery/bigquery_client.py renamed to biquery_sql_etl/sources/bigquery/bigquery_client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""BigQuery Client."""
22
from sqlalchemy.engine import create_engine
33
from sqlalchemy import MetaData
4-
from src.sources.client import BaseClient
4+
from biquery_sql_etl.sources.client import BaseClient
55

66

77
class BigQueryClient(BaseClient):
Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,26 +11,27 @@
1111
class BaseClient:
1212

1313
def __init__(self, engine=None, metadata=None, table=None):
14-
# self.table = table
1514
self.engine = engine
1615
self.metadata = metadata
16+
self.table = table
1717

1818
def insert_rows(self, rows, table, replace=None):
1919
"""Insert rows into table."""
2020
if replace:
2121
self.engine.execute(f'TRUNCATE TABLE {table}')
2222
table = Table(table, self.metadata, autoload=True)
2323
self.engine.execute(table.insert(), rows)
24-
return self.__construct_response(rows)
24+
return self.__construct_response(table, rows)
2525

2626
def fetch_rows(self, query):
2727
"""Fetch all rows via query."""
2828
rows = self.engine.execute(query).fetchall()
2929
return rows
3030

31-
def __construct_response(self, rows):
31+
@staticmethod
32+
def __construct_response(table, rows):
3233
"""Summarize results of an executed query."""
3334
columns = rows[0].keys()
3435
column_names = ", ".join(columns)
3536
num_rows = len(rows)
36-
return f'Inserted {num_rows} rows into `{self.table}` with {len(columns)} columns: {column_names}'
37+
return f'Inserted {num_rows} rows into `{table}` with {len(columns)} columns: {column_names}'

src/sources/database/__init__.py renamed to biquery_sql_etl/sources/sqldatabase/__init__.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
from config.sql_config import (database_username,
2-
database_password,
3-
database_host,
4-
database_port,
5-
database_table,
6-
database_name)
1+
from config import (database_username,
2+
database_password,
3+
database_host,
4+
database_port,
5+
database_table,
6+
database_name)
77
from .database_client import Database
88

99
dbc = Database('mysql',
Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,30 @@
11
"""SQL Database Client."""
22
from sqlalchemy.engine import create_engine
33
from sqlalchemy import MetaData
4-
from src.sources.client import BaseClient
4+
from biquery_sql_etl.sources.client import BaseClient
55

66

77
class Database(BaseClient):
88

99
def __init__(self,
10-
type,
10+
db_type,
1111
username=None,
1212
password=None,
1313
host=None,
1414
port=None,
1515
table=None,
1616
db_name=None):
1717
self.table = table
18-
self.type = self.__conection_type(type)
18+
self.type = self.__connection_type(db_type)
1919
self.engine = create_engine(f'{self.type}://{username}:{password}@{host}:{port}/{db_name}')
2020
super().__init__(engine=self.engine,
2121
metadata=MetaData(bind=self.engine),
2222
table=table)
2323

2424
@staticmethod
25-
def __conection_type(type):
26-
if type.lower() == 'mysql':
25+
def __connection_type(db_type):
26+
if db_type.lower() == 'mysql':
2727
return 'mysql+pymysql'
28-
if type.lower() == 'postgres':
28+
if db_type.lower() == 'postgres':
2929
return 'postgresql+psycopg2'
3030
return None

0 commit comments

Comments
 (0)