/
sqoop_command.txt
279 lines (144 loc) · 12.7 KB
/
sqoop_command.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
--list database and tables
sqoop list-databases --connect jdbc:mysql://localhost --username root
sqoop list-tables --connect jdbc:mysql://localhost/retail_db --username root
--eval to evaluate a query
sqoop eval --connect jdbc:mysql://localhost/retail_db --username root --query "select count(*) from orders"
sqoop eval --connect jdbc:mysql://localhost/retail_db --username root --query "insert into orders values(10000,'2018--2-24 00:00:00.0',1233,'DUMMY')"
sqoop eval --connect jdbc:mysql://localhost/retail_export --username root --query "create table dummy(i int)"
--import and split-by
sqoop import --connect jdbc:mysql://localhost/retail_db --username root --target-dir /user/training/sarvesh_practice --split-by order_id --table orders -num-mappers 1
-append --to append in the directory
sqoop import --connect jdbc:mysql://localhost/retail_db --username root --target-dir /user/training/sarvesh_practice --split-by order_id --table orders -num-mappers 1 -append
--delete-target-dir(delete target directory )
sqoop import --connect jdbc:mysql://localhost/retail_db --username root --target-dir /user/training/sarvesh_practice --split-by order_id --table orders --delete-target-dir
autoreset-to-one-mapper: if there is no primary key we can use this option
sqoop import --connect jdbc:mysql://localhost/retail_db --username root --target-dir /user/training/sarvesh_practice --autoreset-to-one-mapper --table orders --delete-target-dir
--file format
sqoop import --connect jdbc:mysql://localhost/retail_db --username root --target-dir /user/training/sarvesh_practice/avro --autoreset-to-one-mapper --table orders --delete-target-dir \
--as-avrodatafile
sqoop import --connect jdbc:mysql://localhost/retail_db --username root --target-dir /user/training/sarvesh_practice/parquet --autoreset-to-one-mapper --table orders --delete-target-dir \
--as-parquetfile
sqoop import --connect jdbc:mysql://localhost/retail_db --username root --target-dir /user/training/sarvesh_practice/sequence --autoreset-to-one-mapper --table orders --delete-target-dir\
--as-sequencefile
sqoop import --connect jdbc:mysql://localhost/retail_db --username root --target-dir /user/training/sarvesh_practice/text --autoreset-to-one-mapper --table orders --delete-target-dir\
--as-textfile
----You can not import from sqoop directly as orc file need to use hcatalog hive table option will create file under /user/training/warehouse/my_table_orc
sqoop import --connect jdbc:mysql://localhost/retail_db --username root --target-dir /user/training/sarvesh_practice/orc --autoreset-to-one-mapper --table orders --delete-target-dir \
--hcatalog-database default --hcatalog-table my_table_orc --create-hcatalog-table --hcatalog-storage-stanza "stored as orcfile"
--use hcatalog for json file as well
--compression
--gzip
sqoop import --connect jdbc:mysql://localhost/retail_db --username root --target-dir /user/training/sarvesh_practice/text_gzip --autoreset-to-one-mapper --table orders --delete-target-dir \
--as-textfile --compress --compression-codec org.apache.hadoop.io.compress.GzipCodec
sqoop import --connect jdbc:mysql://localhost/retail_db --username root --target-dir /user/training/sarvesh_practice/gzip --autoreset-to-one-mapper --table orders --delete-target-dir \
--as-parquetfile --compress --compression-codec org.apache.hadoop.io.compress.GzipCodec
--snappy
sqoop import --connect jdbc:mysql://localhost/retail_db --username root --target-dir /user/training/sarvesh_practice/text_snappy --autoreset-to-one-mapper --table orders --delete-target-dir\
--as-textfile --compress --compression-codec org.apache.hadoop.io.compress.SnappyCodec
sqoop import --connect jdbc:mysql://localhost/retail_db --username root --target-dir /user/training/sarvesh_practice/parquetsnappy --autoreset-to-one-mapper --table orders --delete-target-dir \
--as-parquetfile --compress --compression-codec org.apache.hadoop.io.compress.SnappyCodec
---to view the compressed file
hdfs dfs -get /user/training/sarvesh_practice/text_gzip/part-m-00000.gz
gunzip part-m-00000.gz
tail part-m-00000
or
hdfs dfs -text /user/training/sarvesh_practice/text_gzip/part-m-00000.gz
--boundary-query :Boundary query to use for creating splits
sqoop import --connect jdbc:mysql://localhost/retail_db --username root --target-dir /user/training/sarvesh_practice/text_boundary --split-by order_id --table orders --delete-target-dir --as-textfile --boundary-query "select min(order_id),max(order_id) from orders where order_id<10000"
**************where****************
sqoop import --connect jdbc:mysql://localhost/retail_db --username root --target-dir /user/training/sarvesh_practice/text_where --split-by order_id --table orders --delete-target-dir --as-textfile --where "order_id<10"
*********query****************
sqoop import --connect jdbc:mysql://localhost/retail_db --username root --target-dir /user/training/sarvesh_practice/text_query --split-by order_id --query "select * from orders where order_id <100 and \$CONDITIONS" --delete-target-dir --as-textfile
***null-string and --null-non-string****************
sqoop import --connect jdbc:mysql://localhost/retail_db --username root --target-dir /user/training/sarvesh_practice/text_query_null --split-by order_id --query "select * from orders where order_id <100 and \$CONDITIONS" --delete-target-dir --as-textfile --null-string "NA" --null-non-string -1
********incremental import************
--check-column (col) Specifies the column to be examined when determining which rows to import. (the column should not be of type CHAR/NCHAR/VARCHAR/VARNCHAR/ LONGVARCHAR/LONGNVARCHAR)
--incremental (mode) Specifies how Sqoop determines which rows are new. Legal values for mode include append and lastmodified.
--last-value (value) Specifies the maximum value of the check column from the previous import.
sqoop import --connect jdbc:mysql://localhost/retail_db --username root --target-dir /user/training/sarvesh_practice/incremental --split-by order_id \
--incremental append --check-column order_id --last-value 0
better to create job if you want --lastvalue to be updated automatically for next run care by job
sqoop job --create "import_orders" -- import --connect jdbc:mysql://localhost/retail_db --username root --target-dir /user/training/sarvesh_practice/incremental --split-by order_id --incremental append --check-column order_id --last-value 0 --table orders
sqoop job --exec import_orders (--last value used will be 0)
sqoop job --exec import_orders (--last value used will be highest value used in last job execution above)
sqoop job --create "import_orders_lastmodified" -- import --connect jdbc:mysql://localhost/retail_db --username root --target-dir /user/training/sarvesh_practice/incremental_lastmodified --split-by order_id --incremental lastmodified --check-column order_id --table orders
sqoop job --exec import_orders_lastmodified
sqoop job --create test_2 -- import --connect jdbc:mysql://localhost/retail_db --username root --target-dir /user/training/sarvesh_practice_incremental/incrementals --split-by order_id --incremental lastmodified --check-column order_date --last-value '2014-07-22 00:00:00' --table orders
sqoop job --exec test_2
************hive-import********************
hive>create database test_hive_import
hive>create table orders(
order_id int,
order_date string,
order_customer_id int ,
order_status string
)
stored as textfile ;
sqoop import --connect jdbc:mysql://localhost/retail_db --username root --hive-import --hive-database test_hive_import --hive-table orders --table orders -m 1
**********hive-overwrite: Overwrite if table exists**************
sqoop import \
--connect jdbc:mysql://localhost/retail_db \
--username root \
--table order_items \
--split-by order_id \
--hive-import \
--hive-database test_hive_import \
--hive-table order_items \
--num-mappers 1 \
--hive-overwrite
******create-hive-table****************8
sqoop import-all-tables --connect "jdbc:mysql://quickstart.cloudera:3306/retail_db" --username retail_dba --password cloudera --warehouse-dir /user/hive/warehouse/problem6.db --hive-import --hive-database problem6 --create-hive-table --as-textfile;
sqoop import --connect jdbc:mysql://localhost/retail_db --username root --table order_items --split-by order_id --hive-import --hive-database test_hive_import --hive-table order_items_hive --num-mappers 1 --create-hive-table --hive-overwrite --target-dir /user/training/order_items_hive
***import-all-tables***********
sqoop import-all-tables --connect jdbc:mysql://localhost/test --username training --m 1 --warehouse-dir=/user/training/sqoop_import
*****outdir to generate java code****
sqoop import-all-tables --num-mappers 1 --connect "jdbc:mysql://localhost/retail_db" --username=root --hive-import --create-hive-table --outdir java_files --hive-database test_hive_import
**********Validate by running this query on shell prompt*********
hive -e "USE retail_db; SHOW TABLES; SELECT * FROM orders;"
*************sqoop merge****************
1:Using sqoop merge data available in /user/training/problem5/products-text-part1 and /user/cloudera/problem5/products-text-part2
to produce a new set of files in /user/training/problem5/products-text-both-parts
sqoop merge --class-name products_replica \
--jar-file /tmp/sqoop-training/compile/58eef185483e6f0c39751a4fa136561d/products_replica.jar \
--new-data /user/training/problem5/products-text-part2 \
--onto /user/training/problem5/products-text-part1 \
--target-dir /user/training/problem5/products-text-both-parts \
--merge-key product_id
*********stage table*************
sqoop export --connect jdbc:mysql://localhost/retail_db \
--username root \
--table departments
--export-dir /user/hive/warehouse/retail_ods.db/departments
--input-fields-terminated-by '|'
--input-lines-terminated-by 'n'
--staging_table department_stage
--clear-staging-table
*********sqoop export******************
--updatemode --updateonly ,allowinsert
mysql>create table order_items_export as select * from retail_db.order_items;
mysql>alter table order_items_export add primary key (order_item_id);
sqoop export --connect jdbc:mysql://localhost/retail_export --username root --export-dir /user/hive/warehouse/test_hive_import.db/order_items --table order_items_export --update-mode allowinsert --update-key order_item_id -m 1 --input-fields-terminated-by '\001' --input-null-non-string "null" --input-null-string "null" -- columns order_item_id , order_item_order_id , order_item_product_id , order_item_quantity , order_item_subtotal ,order_item_product_price
mysql> select count(*) from order_items_export;
+----------+
| count(*) |
+----------+
| 172201 |
+----------+
hive (test_hive_import)> insert into order_items values(172202,68883,502,1,50,50);
sqoop export --connect jdbc:mysql://localhost/retail_export --username root --export-dir /user/hive/warehouse/test_hive_import.db/order_items --table order_items_export --update-mode allowinsert --update-key order_item_id -m 1 --input-fields-terminated-by '\001' --input-null-non-string "null" --input-null-string "null" -- columns order_item_id , order_item_order_id , order_item_product_id , order_item_quantity , order_item_subtotal ,order_item_product_price
mysql> select count(*) from order_items_export;
+----------+
| count(*) |
+----------+
| 172202 |
+----------+
create table order_item_dtl
(order_id Int,
order_date date,
order_status varchar(45),
order_item_subtotal Float
);
load data local inpath "/home/training/ord_item_dtl" into table order_item_dtl;
sqoop export --connect jdbc:mysql:localhost//retail_database --username root --export-dir /user/hive/warehouse/final_exam_practice.db/order_item_dtl --update-mode updateonly --update-key order_id -m1 --table order_item_dtl
sqoop export --connect jdbc:mysql://localhost/retail_database --username root --export-dir /user/hive/warehouse/final_exam_practice.db/order_item_dtl1 --update-mode allowinsert --update-key order_id -m1 --table order_item_dtl --input-fields-terminated-by '\001'
sqoop export --connect jdbc:mysql://localhost/retail_export --username root --table order_items --input-fields-terminated-by '\001' --export-dir /user/hive/warehouse/test_hive_import.db/order_items --columns -- columns order_item_id , order_item_order_id , order_item_product_id , order_item_quantity , order_item_subtotal ,order_item_product_price