This repository has been archived by the owner on Sep 14, 2019. It is now read-only.
/
test_load_table.py
150 lines (113 loc) · 4.64 KB
/
test_load_table.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import config
import utils
import pytest
from sqlalchemy import create_engine, inspect, MetaData
from sqlalchemy.sql import select
def assert_column_type(column, expected_type):
assert column.type.python_type == expected_type
def assert_columns_have_non_unique_indexes(engine, table_name, *columns):
inspection = inspect(engine)
non_unique_1_column_indexes = [
index['column_names'][0]
for index in inspection.get_indexes(table_name)
if not index['unique'] and len(index['column_names']) == 1
]
assert set(columns) <= set(non_unique_1_column_indexes)
class TestLoadTable():
def setup_method(self, method):
self.engine = create_engine(config.SQLA_URI)
self.meta = MetaData(bind=self.engine)
def teardown_method(self, method):
self.meta.drop_all()
def load_table(self, tmpdir, csv_str, table_name, **kwargs):
file_name = '{}.csv'.format(table_name)
csv_file = tmpdir.join(file_name)
csv_file.write(csv_str)
utils.load_table(str(csv_file), table_name, **kwargs)
self.meta.reflect()
def test_good_csv(self, tmpdir):
CSV = """name, dob, number_of_pets
Tom, 1980-02-26, 0
Dick, 1982-03-14, 3
Harry, 1972-11-24, 2
"""
self.load_table(tmpdir, CSV, 'people')
assert 'people' in self.meta.tables
people = self.meta.tables['people']
assert_column_type(people.columns['index'], int)
assert_column_type(people.columns['name'], str)
assert_column_type(people.columns['dob'], str)
assert_column_type(people.columns['number_of_pets'], str)
assert people.columns['index'].primary_key is True
assert people.columns['name'].primary_key is False
assert_columns_have_non_unique_indexes(self.engine, 'people', 'name',
'dob', 'number_of_pets')
connection = self.engine.connect()
results = connection.execute(
select([people]).order_by(people.c.index)).fetchall()
assert (0, 'Tom', '1980-02-26', '0') == results[0]
assert (1, 'Dick', '1982-03-14', '3') == results[1]
assert (2, 'Harry', '1972-11-24', '2') == results[2]
def test_load_with_chunking(self, tmpdir):
CSV = """name, dob, number_of_pets
Tom, 1980-02-26, 0
Dick, 1982-03-14, 3
Harry, 1972-11-24, 2
Chris, 1991-08-03, 0
Jill, 1990-04-15, 1
Lorenzo, 2003-01-01, 0
Alyssa, 2005-02-02, 1
Andrew, 2008-03-03, 1
Javi, 2008-04-04, 2
"""
self.load_table(tmpdir, CSV, 'people', chunksize=4)
people = self.meta.tables['people']
connection = self.engine.connect()
results = connection.execute(
select([people]).order_by(people.c.index)).fetchall()
assert (0, 'Tom', '1980-02-26', '0') == results[0]
assert (1, 'Dick', '1982-03-14', '3') == results[1]
assert (2, 'Harry', '1972-11-24', '2') == results[2]
assert (3, 'Chris', '1991-08-03', '0') == results[3]
assert (4, 'Jill', '1990-04-15', '1') == results[4]
assert (5, 'Lorenzo', '2003-01-01', '0') == results[5]
assert (6, 'Alyssa', '2005-02-02', '1') == results[6]
assert (7, 'Andrew', '2008-03-03', '1') == results[7]
assert (8, 'Javi', '2008-04-04', '2') == results[8]
def test_empty_csv(self, tmpdir):
file_name = 'empty.csv'
csv_file = tmpdir.join(file_name)
csv_file.ensure()
with pytest.raises(Exception):
utils.load_table(str(csv_file), 'empty')
def test_embedded_space_in_columnname(self, tmpdir):
CSV = """name, number of pets
Tom, 0
Dick, 3
Harry, 2
"""
self.load_table(tmpdir, CSV, 'people')
people = self.meta.tables['people']
assert 'number of pets' in people.columns
def test_null_empty_string_handling(self, tmpdir):
CSV = """name, number_of_pets
Tom, 0
"", 3
, 2
"""
self.load_table(tmpdir, CSV, 'people')
people = self.meta.tables['people']
connection = self.engine.connect()
results = connection.execute(
select([people]).order_by(people.c.index)).fetchall()
assert (0, 'Tom', '0') == results[0]
assert (1, None, '3') == results[1]
assert (2, None, '2') == results[2]
def test_embedded_space_in_tablename_and_filename(self, tmpdir):
CSV = """name, number_of_pets
Tom, 0
Dick, 3
Harry, 2
"""
self.load_table(tmpdir, CSV, 'We the people')
assert 'We the people' in self.meta.tables