In [16]:
import logging
import argparse

import apache_beam as beam
import apache_beam.io.gcp.bigquery as bq

In [17]:
from apache_beam.runners.interactive.interactive_runner import InteractiveRunner
import apache_beam.runners.interactive.interactive_beam as ib

In [18]:
N=9
p = beam.Pipeline(InteractiveRunner())
squares = (p | "Create Elements" >> beam.Create(range(N))
             | "Squares" >> beam.Map(lambda x: x**2))

In [19]:
ib.show_graph(p)

In [20]:
ib.show(squares)

In [279]:
PROJECT_ID ="my-bq-demo"
DATASET_ID="input"
TABLE_ID="input_for_transpose"
INPUT_SCHEMA = {
    'fields': [{
        'name': 'ID', 
        'type': 'INTEGER', 
        'mode': 'REQUIRED'
    }, 
    {
        'name': 'CLASS', 
        'type': 'STRING', 
        'mode': 'NULLABLE'
    },
    {
        'name': 'SALES', 
        'type': 'FLOAT', 
        'mode': 'NULLABLE'
        }]
        }

OUTPUT_SCHEMA ={'fields': []}
key_field = ['ID']
pivot_field = ['CLASS']
value_field = ['SALES']
value_schema = {'SALES':'FLOAT'}
table_id = 'my-bq-demo:output.out111'

In [258]:
class GetPivotValues(beam.DoFn):
    def process(self, element):
        print(element)
        rt_elem = {}
        row = element
        for field in pivot_field:
            rt_elem = {field, row[field]}
            yield rt_elem

In [259]:
class UniqueList(beam.DoFn):
    def process(self, element):
        rt_elem =  list(set(element[1])) 
        yield rt_elem

In [287]:
class FoldPivotValues(beam.DoFn):
    def process(self, element):
        rt_dict={}
        rt_dict['mode'] = 'NULLABLE'
        for piv in element:
            for val in value_field:
                name = f"{piv}_{val}"
                rt_dict['name'] = name
                rt_dict['type'] = value_schema[val]
                # OUTPUT_SCHEMA['fields'].append(rt_dict)
                yield rt_dict
         

In [288]:
def getKeyFieldSchema(list_keys):
    rt_l = []
    for key in list_keys:
        for d in INPUT_SCHEMA['fields']:
            if (d['name'] == key):
                rt_l.append(d)
                # OUTPUT_SCHEMA['fields'].append(d)
    return rt_l

In [262]:
getKeyFieldSchema(key_field)

[{'name': 'ID', 'type': 'INTEGER', 'mode': 'REQUIRED'}]

In [275]:
def addToOutputSchema(element):
    OUTPUT_SCHEMA['fields'].append(element)

In [293]:

OUTPUT_SCHEMA ={'fields': []}
with beam.Pipeline(InteractiveRunner()) as p:
        
        input_table = ( p 
                    | "Read BigQuery table" >>  beam.Create(
                         [{"ID":123, "CLASS": "AAA", "SALES":101.44},
                        {"ID":123, "CLASS": "BBB", "SALES":345.44},
                        {"ID":1234, "CLASS": "AAA", "SALES":458.44}]
                        )
                    )
        pivoted_schema = ( input_table
                  | "Get Pivot Schema" >> beam.ParDo(GetPivotValues())        
                  | "Group by pivot field" >> beam.GroupByKey()
                  | "Get unique list" >> beam.ParDo(UniqueList())
                  | "Fold pivot values to columns" >> beam.ParDo(
                        FoldPivotValues())
                 )
        list_key_dict = getKeyFieldSchema(key_field)
        key_schema = (p 
                      | "Create Key Schema" >> beam.Create(list_key_dict)
                     )
        # merged = ((input1,input2) | 'Merge PCollections' >> beam.Flatten())
        dynamic_schema = ( (key_schema,pivoted_schema) 
                          | 'Merge Schema' >> beam.Flatten()
                          # | "Dynamic Schema" >> beam.CombineGlobally(beam.combiners.ToListCombineFn())
                )
        # (dynamic_schema  
        #             | "add to the output schema" >> beam.Map(addToOutputSchema)
        #                           )
        # print("=====")        
        # print(OUTPUT_SCHEMA)
        OUTPUT_SCHEMA['fields'] = beam.pvalue.AsList(dynamic_schema)        

{'ID': 123, 'CLASS': 'AAA', 'SALES': 101.44}
{'ID': 123, 'CLASS': 'BBB', 'SALES': 345.44}
{'ID': 1234, 'CLASS': 'AAA', 'SALES': 458.44}


In [294]:
print(OUTPUT_SCHEMA)

{'fields': <apache_beam.pvalue.AsList object at 0x1436f9a90>}


In [None]:
{'fields': [{'name': 'ID', 'type': 'INTEGER', 'mode': 'REQUIRED'},
            {'mode': 'NULLABLE', 'name': 'BBB_SALES', 'type': 'FLOAT'}, 
            {'mode': 'NULLABLE', 'name': 'BBB_SALES', 'type': 'FLOAT'}, 
            
            [{'name': 'ID', 'type': 'INTEGER', 'mode': 'REQUIRED'}, 
             {'mode': 'NULLABLE', 'name': 'AAA_SALES', 'type': 'FLOAT'}, 
             {'mode': 'NULLABLE', 'name': 'BBB_SALES', 'type': 'FLOAT'}]]}


In [295]:
ib.show(input_table)

In [296]:
ib.show(pivoted_schema)

In [297]:
ib.show(key_schema)

In [298]:
ib.show(dynamic_schema)

In [59]:
t = ("CLASS" , ['AAA', 'BBB', 'AAA'])

In [61]:
t[1]

['AAA', 'BBB', 'AAA']

In [99]:
import typing

In [None]:
class Transaction(typing.NamedTuple):
      bank: str
      purchase_amount: float
      
      def AddField(self, field_name, field_type):
        


In [116]:
class Transaction(typing.NamedTuple):
  bank: str
  purchase_amount: float

with beam.Pipeline(InteractiveRunner()) as p:
    
    input = (p | beam.Create([("AAA",23.2),("BBB",4587.4)]).with_output_types(Transaction))
            # )
    # pc = (input | beam.Map()

In [117]:
ib.show(input)

In [None]:
Transaction.AddFields(

In [None]:
# AddFields
with beam.Pipeline(InteractiveRunner()) as p:
    (p | beam.Create([("AAA",23.2)]).with_output_types(Transaction)
     

In [107]:
with beam.Pipeline(InteractiveRunner()) as p:
    lines = ( p
                | "Input File" >> beam.Create(
                    [
                        {"bank":"Adfj","purchase":234.45,"notes":"dsjkfhdsj"},
                        {"bank":"sdhf","purchase":56.45,"notes":"sdf"},
                        {"bank":"bbb","purchase":768.45,"notes":"ddfsj"},
                        {"bank":"ddd","purchase":245.45}
                    ]
                ))


In [108]:
ib.show(lines)

In [122]:
str_schema = 'ID:INTEGER,CLASS:STRING,SALES:FLOAT'

In [123]:
table_schema = bq.WriteToBigQuery.get_table_schema_from_string(str_schema)
print(table_schema)

<TableSchema
 fields: [<TableFieldSchema
 fields: []
 mode: 'NULLABLE'
 name: 'ID'
 type: 'INTEGER'>, <TableFieldSchema
 fields: []
 mode: 'NULLABLE'
 name: 'CLASS'
 type: 'STRING'>, <TableFieldSchema
 fields: []
 mode: 'NULLABLE'
 name: 'SALES'
 type: 'FLOAT'>]>


In [124]:
bq.WriteToBigQuery.get_dict_table_schema(str_schema)

{'fields': [{'name': 'ID', 'type': 'INTEGER', 'mode': 'NULLABLE'},
  {'name': 'CLASS', 'type': 'STRING', 'mode': 'NULLABLE'},
  {'name': 'SALES', 'type': 'FLOAT', 'mode': 'NULLABLE'}]}

In [125]:
bq.WriteToBigQuery.get_dict_table_schema(table_schema)

{'fields': [{'name': 'ID', 'type': 'INTEGER', 'mode': 'NULLABLE'},
  {'name': 'CLASS', 'type': 'STRING', 'mode': 'NULLABLE'},
  {'name': 'SALES', 'type': 'FLOAT', 'mode': 'NULLABLE'}]}