Programación Avanzada
===

In [2]:
%run bd.py
%pig_init




## flatten

In [3]:
%%writefile data.tsv
A	10	(1, 2)
B	20	(3, 4)
C	30	(5, 6)
D	40	(7, 8)

Writing data.tsv


In [4]:
%%pig
u = LOAD 'data.tsv'
    AS (f1:chararray, f2:int, f3:tuple(p:int, q:int));
DUMP u;

(A,10,(1,2))
(B,20,(3,4))
(C,30,(5,6))
(D,40,(7,8))


In [5]:
%%pig
r = foreach u generate f1, flatten(f3);
dump r;

(A,1,2)
(B,3,4)
(C,5,6)
(D,7,8)


In [6]:
%%writefile data.tsv
A	10	{(1),(2)}
B	20	{(3),(4)}
C	30	{(5),(6)}
D	40	{(7),(8)}

Overwriting data.tsv


In [7]:
%%pig
u = LOAD 'data.tsv'
    AS (f1:chararray, f2:int, f3:bag{t:(p:int)});
DUMP u;

(A,10,{(1),(2)})
(B,20,{(3),(4)})
(C,30,{(5),(6)})
(D,40,{(7),(8)})


In [8]:
%%pig
r = foreach u generate f1, flatten(f3);
dump r;

(A,1)
(A,2)
(B,3)
(B,4)
(C,5)
(C,6)
(D,7)
(D,8)


In [9]:
%%pig
r = foreach u generate flatten(f3);
dump r;

(1)
(2)
(3)
(4)
(5)
(6)
(7)
(8)


In [10]:
%%pig
--
-- se pueden colocar varios comandos dentro de
-- un foreach
--
r1 = foreach u {
        generate flatten(f3);
};
dump r1;

(1)
(2)
(3)
(4)
(5)
(6)
(7)
(8)


In [11]:
%%pig
r1 = foreach u generate (double) $1;
dump r1;

(10.0)
(20.0)
(30.0)
(40.0)


## cogroup

In [13]:
%%writefile data0.tsv
A	10	1
B	20	2
C	30	3
D	40	4

Writing data0.tsv


In [14]:
%%writefile data1.tsv
A	50	5
B	60	6
C	70	7
D	80	8

Writing data1.tsv


In [16]:
%%pig
u = LOAD 'data0.tsv' AS (f1:chararray, f2:int, f3:int);
v = LOAD 'data1.tsv' AS (f1:chararray, f4:int, f5:int);
s = COGROUP u by f1, v by f1;
dump s;

(A,{(A,10,1)},{(A,50,5)})
(B,{(B,20,2)},{(B,60,6)})
(C,{(C,30,3)},{(C,70,7)})
(D,{(D,40,4)},{(D,80,8)})


## Union

In [17]:
%%pig
s = union u, v;
dump s;

(A,50,5)
(B,60,6)
(C,70,7)
(D,80,8)
(A,10,1)
(B,20,2)
(C,30,3)
(D,40,4)


## cross

In [19]:
%%pig
s = cross u, v;
dump s;

(D,40,4,D,80,8)
(D,40,4,C,70,7)
(D,40,4,B,60,6)
(D,40,4,A,50,5)
(C,30,3,D,80,8)
(C,30,3,C,70,7)
(C,30,3,B,60,6)
(C,30,3,A,50,5)
(B,20,2,D,80,8)
(B,20,2,C,70,7)
(B,20,2,B,60,6)
(B,20,2,A,50,5)
(A,10,1,D,80,8)
(A,10,1,C,70,7)
(A,10,1,B,60,6)
(A,10,1,A,50,5)


## rank

In [18]:
%%pig
s = rank u by f2;
dump s;

(1,A,10,1)
(2,B,20,2)
(3,C,30,3)
(4,D,40,4)


## stream

Revise la documentación sobre este operador.

In [20]:
%%pig
s = cross u, v;
dump s;

(D,40,4,D,80,8)
(D,40,4,C,70,7)
(D,40,4,B,60,6)
(D,40,4,A,50,5)
(C,30,3,D,80,8)
(C,30,3,C,70,7)
(C,30,3,B,60,6)
(C,30,3,A,50,5)
(B,20,2,D,80,8)
(B,20,2,C,70,7)
(B,20,2,B,60,6)
(B,20,2,A,50,5)
(A,10,1,D,80,8)
(A,10,1,C,70,7)
(A,10,1,B,60,6)
(A,10,1,A,50,5)


In [21]:
%%pig
w = STREAM s THROUGH  `tail -n 2 `;
dump w;

===== Task Information Header =====
Command: tail -n 2 (stdin-org.apache.pig.builtin.PigStreaming/stdout-org.apache.pig.builtin.PigStreaming)
Start time: Tue Oct 16 20:53:26 COT 2018
=====          * * *          =====
===== Task Information Footer =====
End time: Tue Oct 16 20:53:26 COT 2018
Exit code: 0
Input records: 16
Input bytes: 3712 bytes (stdin using org.apache.pig.builtin.PigStreaming)
Output records: 2
Output bytes: 560 bytes (stdout using org.apache.pig.builtin.PigStreaming)
=====          * * *          =====
(A,10,1,B,60,6)
(A,10,1,A,50,5)


## cube

In [22]:
%%pig 
dump u;

(A,10,1)
(B,20,2)
(C,30,3)
(D,40,4)


In [23]:
%%pig 
s = cube u by cube(f1, f2);
dump s;

((A,10),{(A,10,1)})
((A,),{(A,,1)})
((B,20),{(B,20,2)})
((B,),{(B,,2)})
((C,30),{(C,30,3)})
((C,),{(C,,3)})
((D,40),{(D,40,4)})
((D,),{(D,,4)})
((,10),{(,10,1)})
((,20),{(,20,2)})
((,30),{(,30,3)})
((,40),{(,40,4)})
((,),{(,,4),(,,3),(,,1),(,,2)})


## explain


Revise la documentación sobre este operador.

## illustrate

In [24]:
%%pig
illustrate s;

(D,40,4)
---------------------------------------------------------
| u     | f1:chararray      | f2:int      | f3:int      | 
---------------------------------------------------------
|       | D                 | 40          | 4           | 
|       | C                 | 30          | 3           | 
---------------------------------------------------------
------------------------------------------------------------
| cube     | f1:chararray      | f2:int      | f3:int      | 
------------------------------------------------------------
|          | D                 | 40          | 4           | 
|          | D                 |             | 4           | 
|          |                   | 40          | 4           | 
|          |                   |             | 4           | 
|          | C                 | 30          | 3           | 
|          | C                 |             | 3           | 
|          |                   | 30          | 3           | 
|          |          

---