In [None]:
!pip3 install apache_beam

In [None]:
import apache_beam as beam

###Initial example

In [None]:
pipeline1 = beam.Pipeline()
grocery = (
    pipeline1
    |"Read from text" >> beam.io.ReadFromText("/content/grocery.txt",skip_header_lines=1)
    |"Split the record" >> beam.Map(lambda record: record.split(','))
    |"Filter regular" >> beam.Filter(lambda record: record[5]=="Regular")
    |"Write text" >>beam.io.WriteToText("/content/regular_filter.txt")
)

pipeline1.run()

<apache_beam.runners.portability.fn_api_runner.fn_runner.RunnerResult at 0x7f36b928fdc0>

In [None]:
!cat /content/regular_filter.txt-00000-of-00001

In [None]:
with beam.Pipeline() as pipeline:
  grocery = (
    pipeline
    |"Read from text" >> beam.io.ReadFromText("/content/grocery.txt",skip_header_lines=1)
    |"Split the record" >> beam.Map(lambda record: record.split(','))
    |"Filter regular" >> beam.Filter(lambda record: record[5]=="Regular")
    |"Print" >> beam.Map(print)
  )

###Maps and Filter

In [None]:
# MapTuple for key-value pairs
with beam.Pipeline() as pipeline:
  plants = (
      pipeline
      | 'Gardening plants' >> beam.Create([
          ('🍓', 'Strawberry'),
          ('🥕', 'Carrot'),
          ('🍆', 'Eggplant'),
          ('🍅', 'Tomato'),
          ('🥔', 'Potato'),
      ])
      | 'Format' >> beam.MapTuple(lambda icon, plant: '{}-{}'.format(plant,icon))
      | beam.Map(print))

Strawberry-🍓
Carrot-🥕
Eggplant-🍆
Tomato-🍅
Potato-🥔


In [None]:
def is_perennial(plant):
  return plant['duration'] == 'perennial'

with beam.Pipeline() as pipeline:
  perennials = (
      pipeline
      | 'Gardening plants' >> beam.Create([
          {
              'icon': '🍓', 'name': 'Strawberry', 'duration': 'perennial'
          },
          {
              'icon': '🥕', 'name': 'Carrot', 'duration': 'biennial'
          },
          {
              'icon': '🍆', 'name': 'Eggplant', 'duration': 'perennial'
          },
          {
              'icon': '🍅', 'name': 'Tomato', 'duration': 'annual'
          },
          {
              'icon': '🥔', 'name': 'Potato', 'duration': 'perennial'
          },
      ])
      | 'Filter perennials' >> beam.Filter(is_perennial)
      | beam.Map(print))

###ParDo Keys, Kvswap, Values, ToString Transform

• ParDo is a Beam transform for generic parallel processing.

• The ParDo processing paradigm is similar to the “Map” phase of a Map/Shuffle/Reduce-style algorithm: a ParDo transform considers each element in the input PCollection, performs some processing function (your user code) on that element, and emits zero, one, or multiple elements to an output PCollection.

In [None]:
class SplitRow(beam.DoFn):
  def process(self, element):
    return [element.split(',')]


class ComputeWordLengthFn(beam.DoFn):
  def process(self, element):
    return [len(element)]
     

In [None]:
with beam.Pipeline() as pipeline:
  input_data = (pipeline
                | "read from text">> beam.io.ReadFromText("/content/students.txt", skip_header_lines= True)
                | "spliting the record" >> beam.ParDo(SplitRow()))
  
  count_data = (input_data
                |"filtering the data with PASS" >> beam.Filter(lambda record : record[5]=="FAIL"))
  
  word_lengths = (count_data 
                 |"countof records" >> beam.ParDo(ComputeWordLengthFn())
                 |beam.Map(print))
  
  output_data = (count_data
                 | "Write to Text" >> beam.io.WriteToText("fail_data"))


In [None]:
!{('head -n 10 fail_data-00000-of-00001')}

**Keys:** Takes a collection of key-value pairs and returns the key of each element.


**Values:**
Takes a collection of key-value pairs, and returns the value of each element.


In [None]:
with beam.Pipeline() as pipeline:
  icons = (
      pipeline
      | 'Garden plants' >> beam.Create([
          ('🍓', 'Strawberry'),
          ('🥕', 'Carrot'),
          ('🍆', 'Eggplant'),
          ('🍅', 'Tomato'),
          ('🥔', 'Potato'),
      ])
      | 'Keys' >> beam.Keys()
      | beam.Map(print))

In [None]:
with beam.Pipeline() as pipeline:
  plants = (
      pipeline
      | 'Garden plants' >> beam.Create([
          ('🍓', 'Strawberry'),
          ('🥕', 'Carrot'),
          ('🍆', 'Eggplant'),
          ('🍅', 'Tomato'),
          ('🥔', 'Potato'),
      ])
      | 'Values' >> beam.Values()
      | beam.Map(print))

**ToString**

Transforms every element in an input collection to a string. Any non-string element can be converted to a string using standard Python functions and methods. Many I/O transforms, such as textio.WriteToText, expect their input elements to be strings.

1. Key-value pairs to string
2. Elements to string
3. Iterables to string




In [None]:

with beam.Pipeline() as pipeline:
  plants = (
      pipeline
      | 'Garden plants' >> beam.Create([
          ('🍓', 'Strawberry'),
          ('🥕', 'Carrot'),
          ('🍆', 'Eggplant'),
          ('🍅', 'Tomato'),
          ('🥔', 'Potato'),
      ])
      | 'To string' >> beam.ToString.Kvs()  #Element() #Iterables()
      | beam.Map(print))

**Kvswap :**

• Takes a collection of key-value pairs and returns a collection of key-value pairs which has each key and value swapped.



In [None]:
with beam.Pipeline() as pipeline:
  plants = (
      pipeline
      | 'Garden plants' >> beam.Create([
          ('🍓', 'Strawberry'),
          ('🥕', 'Carrot'),
          ('🍆', 'Eggplant'),
          ('🍅', 'Tomato'),
          ('🥔', 'Potato'),
      ])
      | 'Key-Value swap' >> beam.KvSwap()
      | beam.Map(print))

###GroupBy, GroupByKey, CoGroupByKey and GroupIntoBatches Transform


GroupBy:

* Takes a collection of elements and produces a collection grouped, by properties of those elements.

* Unlike GroupByKey, the key is dynamically created from the elements themselves.

In [None]:
with beam.Pipeline() as p:
  grouped = (
      p
      | beam.Create(['strawberry', 'raspberry', 'blueberry', 'blackberry', 'banana'])
      | beam.GroupBy(lambda s: s[0])
      |beam.Map(print))

('s', ['strawberry'])
('r', ['raspberry'])
('b', ['blueberry', 'blackberry', 'banana'])


GroupByKey

* Takes a keyed collection of elements and produces a collection where each element consists of a key and all values associated with that key.


In [None]:
#group by key
records = [("vignesh", [27, "engineer"]),
("neethu", [27, "developer"]),
("farooqui", [26, "data analyst"]),
("sai", [29, "web developer"]),
("tinkle", [28, "fullstack developer"]),
("neethu", 'Employed'),
("sai", 'Unemployed'),
("tinkle", 'Employed'),
("farooqui",'Employed'),
("vignesh", 'Unemployed')]
     

In [None]:
with beam.Pipeline() as pipeline:
  produce_counts = (
      pipeline
      | 'Create produce counts' >> beam.Create(records)
      | 'Group counts per produce' >> beam.GroupByKey()
      | beam.Map(print))

('vignesh', [[27, 'engineer'], 'Unemployed'])
('neethu', [[27, 'developer'], 'Employed'])
('farooqui', [[26, 'data analyst'], 'Employed'])
('sai', [[29, 'web developer'], 'Unemployed'])
('tinkle', [[28, 'fullstack developer'], 'Employed'])


 CoGroupByKey 

* Aggregates all input elements by their key and allows downstream processing to consume all values associated with the key.
* While GroupByKey performs this operation over a single input collection and thus a single type of input values.
* CoGroupByKey operates over multiple input collections. As a result, the result for each key is a tuple of the values associated with that key in each input collection.


In [None]:
with beam.Pipeline() as pipeline:
  student_pairs = pipeline | 'Create icons' >> beam.Create([
      ('vignesh', 'bangalore'),
      ('khaula', 'hyderabad'),
      ('neethu', 'malapur'),
      ('sai', 'chennai'),
  ])

  student_result = pipeline | 'Create durations' >> beam.Create([
      ('vignesh', [15,"FAIL"]),
      ('khaula', [99,"PASS"]),
      ('neethu', [100,"PASS"]),
      ('sai',[ 37,"FAIL"]),
  ])

  plants = (({
      'icons': student_pairs, 'durations': student_result
  })
            | 'Merge' >> beam.CoGroupByKey()
            | beam.Map(print))

('vignesh', {'icons': ['bangalore'], 'durations': [[15, 'FAIL']]})
('khaula', {'icons': ['hyderabad'], 'durations': [[99, 'PASS']]})
('neethu', {'icons': ['malapur'], 'durations': [[100, 'PASS']]})
('sai', {'icons': ['chennai'], 'durations': [[37, 'FAIL']]})


In [None]:

with beam.Pipeline() as pipeline:
  student_pairs = pipeline | 'Create icons' >> beam.Create([
      ('vignesh', 15),
      ('khaula', 99),
      ('neethu', 100),
      ('sai', 37),
  ])

  student_result = pipeline | 'Create durations' >> beam.Create([
      ('vignesh', "FAIL"),
      ('khaula',"PASS"),
      ('neethu',"PASS"),
      ('sai', "FAIL"),
  ])

  plants = (({
      'Marks': student_pairs, 'Result': student_result
  })
  | 'Merge' >> beam.CoGroupByKey()
  | beam.Map(print))

('vignesh', {'Marks': [15], 'Result': ['FAIL']})
('khaula', {'Marks': [99], 'Result': ['PASS']})
('neethu', {'Marks': [100], 'Result': ['PASS']})
('sai', {'Marks': [37], 'Result': ['FAIL']})


GroupIntoBatches 

* Batches the input into desired batch size.




In [None]:
with beam.Pipeline() as pipeline:
  batches_with_keys = (
      pipeline
      | 'Create produce' >> beam.Create([
          ('spring', '🍓'),
          ('spring', '🥕'),
          ('spring', '🍆'),
          ('spring', '🍅'),
          ('summer', '🥕'),
          ('summer', '🍅'),
          ('summer', '🌽'),
          ('fall', '🥕'),
          ('fall', '🍅'),
          ('winter', '🍆'),
      ])
      | 'Group into batches' >> beam.GroupIntoBatches(4)  #3, #2
      | beam.Map(print))

('spring', ['🍓', '🥕', '🍆', '🍅'])
('summer', ['🥕', '🍅', '🌽'])
('fall', ['🥕', '🍅'])
('winter', ['🍆'])


  | 'Group into batches' >> beam.GroupIntoBatches(4)  #3, #2


###Flatten and Partition


Partition:

• Partition is a Beam transform for PCollection objects that store the same data type. It splits a single PCollection into a fixed number of smaller collections.

• Partition divides the elements of a PCollection according to a partitioning function that you provide.

• The partitioning function contains the logic that determines how to split up the elements of the input PCollection into each resulting partition PCollection.

• The number of partitions must be determined at graph construction time.

• Partition accepts a function that receives the number of partitions, and returns the index of the desired partition for the element. The number of partitions passed must be a positive integer, and it must return an integer in the range 0 to num_partitions-1.

In [None]:
import apache_beam as beam

p = beam.Pipeline()
number = {11,12,13,44,55,61,77,88,99}

#asigna el numero a una particion 
def partition_fn(element,num_partition):
  return 0 if element%2 ==0 else 1


number_pc = p| beam.Create(number)| beam.Partition(partition_fn,2) #funcion para asignar particion y numero de particiones

number_pc[0]| 'Printing first partition' >> beam.Map(print)

p.run()
     



12
44
88


<apache_beam.runners.portability.fn_api_runner.fn_runner.RunnerResult at 0x7fb801ffaa00>

• Flatten is a Beam transform for PCollection objects that store the same data type. Flatten merges multiple PCollection objects into a single logical PCollection.

• Kind of Union operation

In [None]:

with beam.Pipeline() as pipeline:
  even_data = (pipeline
               |"Create even data" >> beam.Create({2,4,6,8,10}))
  odd_data = (pipeline
              |"Create odd data" >> beam.Create({1,3,5,7,9,11}))
  
  result = ((even_data, odd_data) | beam.Flatten()) | beam.Map(print)

1
3
5
7
9
11
2
4
6
8
10


###Latest, Max, Min, Sample, Sum and Top

Latest:
 
* Gets the element with the latest timestamp.
* we create a pipeline with a PCollection of produce with a timestamp for their harvest date. We use Latest to get the element with the latest timestamp from the PCollection.

In [None]:
import time

def to_unix_time(time_str, format='%Y-%m-%d %H:%M:%S'):
  return time.mktime(time.strptime(time_str, format))


with beam.Pipeline() as pipeline:
  latest_element = (
      pipeline
      | 'Create crops' >> beam.Create([
          {
              'item': '🥬', 'harvest': '2020-02-24 00:00:00'
          },
          {
              'item': '🍓', 'harvest': '2020-06-16 00:00:00'
          },
          {
              'item': '🥕', 'harvest': '2020-07-17 00:00:00'
          },
          {
              'item': '🍆', 'harvest': '2020-10-26 00:00:00'
          },
          {
              'item': '🍅', 'harvest': '2020-10-01 00:00:00'
          },
      ])
      | 'With timestamps' >> beam.Map(
          lambda crop: beam.window.TimestampedValue(
              crop['item'], to_unix_time(crop['harvest'])))
      | 'Get latest element' >> beam.combiners.Latest.Globally()
      | beam.Map(print))

🍆


Max:

* Gets the element with the maximum value within each aggregation.
* we create a pipeline with a PCollection. Then, we get the element with the maximum value in different ways.

In [None]:
with beam.Pipeline() as pipeline:
  max_element = (
      pipeline
      | 'Create numbers' >> beam.Create([3, 4, 1, 2])
      | 'Get max value' >>
      beam.CombineGlobally(lambda elements: max(elements or [None]))
      | beam.Map(print))

4


Combine.PerKey() to get the minimum element for each unique key in a PCollection of key-values.



In [None]:
with beam.Pipeline() as pipeline:
  elements_with_max_value_per_key = (
      pipeline
      | 'Create produce' >> beam.Create([
          ('🥕', 3),
          ('🥕', 2),
          ('🍆', 1),
          ('🍅', 4),
          ('🍅', 5),
          ('🍅', 3),
      ])
      | 'Get max value per key' >> beam.CombinePerKey(max)
      | beam.Map(print))

('🥕', 3)
('🍆', 1)
('🍅', 5)


Min:
* Gets the element with the minimum value within each aggregation.
* we create a pipeline with a PCollection. Then, we get the element with the minimum value in different ways.

In [None]:

with beam.Pipeline() as pipeline:
  min_element = (
      pipeline
      | 'Create numbers' >> beam.Create([3, 4, 1, 2])
      | 'Get min value' >>
      beam.CombineGlobally(lambda elements: min(elements or [-1]))
      | beam.Map(print))
     

1


Combine.PerKey() to get the minimum element for each unique key in a PCollection of key-values.



In [None]:
with beam.Pipeline() as pipeline:
  elements_with_min_value_per_key = (
      pipeline
      | 'Create produce' >> beam.Create([
          ('🥕', 3),
          ('🥕', 2),
          ('🍆', 1),
          ('🍅', 4),
          ('🍅', 5),
          ('🍅', 3),
      ])
      | 'Get min value per key' >> beam.CombinePerKey(min)
      | beam.Map(print))

('🥕', 2)
('🍆', 1)
('🍅', 3)


Sample:

* Transforms for taking samples of the elements in a collection, or samples of the values associated with each key in a collection of key-value pairs.
* we create a pipeline with a PCollection. Then, we get a random sample of elements in different ways.

Sample.FixedSizeGlobally() to get a fixed-size random sample of elements from the entire PCollection.

In [None]:

with beam.Pipeline() as pipeline:
  sample = (
      pipeline
      | 'Create produce' >> beam.Create([
          '🍓 Strawberry',
          '🥕 Carrot',
          '🍆 Eggplant',
          '🍅 Tomato',
          '🥔 Potato',
      ])
      | 'Sample N elements' >> beam.combiners.Sample.FixedSizeGlobally(3)
      | beam.Map(print))

['🥕 Carrot', '🍆 Eggplant', '🍓 Strawberry']


Sum:

* Sums all the elements within each aggregation.
* we create a pipeline with a PCollection. Then, we get the sum of all the element values in different ways.

In [None]:
with beam.Pipeline() as pipeline:
  total = (
      pipeline
      | 'Create numbers' >> beam.Create([3, 4, 1, 2])
      | 'Sum values' >> beam.CombineGlobally(sum)
      | beam.Map(print))
     

10


Combine.PerKey() to get the sum of all the element values for each unique key in a PCollection of key-values.



In [None]:

with beam.Pipeline() as pipeline:
  totals_per_key = (
      pipeline
      | 'Create produce' >> beam.Create([
          ('🥕', 3),
          ('🥕', 2),
          ('🍆', 1),
          ('🍅', 4),
          ('🍅', 5),
          ('🍅', 3),
      ])
      | 'Sum values per key' >> beam.CombinePerKey(sum)
      | beam.Map(print))

('🥕', 5)
('🍆', 1)
('🍅', 12)


Top:

* Transforms for finding the largest (or smallest) set of elements in a collection, or the largest (or smallest) set of values associated with each key in a collection of key-value pairs.
* we create a pipeline with a PCollection. Then, we get the largest or smallest elements in different ways.

In [None]:

with beam.Pipeline() as pipeline:
  largest_elements = (
      pipeline
      | 'Create numbers' >> beam.Create([3, 4, 1, 2])
      | 'Largest N values' >> beam.combiners.Top.Largest(3)
      | beam.Map(print))

[4, 3, 2]


###Combine Core Transform in Apache Beam

CombineGlobally:

* Combines all elements in a collection.


The more general way to combine elements, and the most flexible, is with a class that inherits from CombineFn.

* CombineFn.create_accumulator(): This creates an empty accumulator. For example, an empty accumulator for a sum would be 0, while an empty accumulator for a product (multiplication) would be 1.

* CombineFn.add_input(): Called once per element. Takes an accumulator and an input element, combines them and returns the updated accumulator.

* CombineFn.merge_accumulators(): Multiple accumulators could be processed in parallel, so this function helps merging them into a single accumulator.

* CombineFn.extract_output(): It allows to do additional calculations before extracting a result.

In [None]:

class AverageFn(beam.CombineFn):
  def create_accumulator(self):
    return (0.0, 0)

  def add_input(self, sum_count, input):
    (sum, count) = sum_count
    return sum + input, count + 1

  def merge_accumulators(self, accumulators):
    sums, counts = zip(*accumulators)
    return sum(sums), sum(counts)

  def extract_output(self, sum_count):
    (sum, count) = sum_count
    return sum / count if count else float('NaN')

In [None]:
with beam.Pipeline() as p:
  input_data = (p
                | "Create data" >> beam.Create([21,45,78,99,1,22,5])
                | "Combine Globally" >> beam.CombineGlobally(AverageFn())
                |"Write to Local">> beam.io.WriteToText('data/result'))

In [None]:

!{'head -n 10 data/result-00000-of-00001'}

38.714285714285715


CombinePerKey:


Combines all elements for each key in a collection.

In [None]:
with beam.Pipeline() as pipeline:
  total = (
      pipeline
      | 'Create plant counts' >> beam.Create([
          ('🥕', 3),
          ('🥕', 2),
          ('🍆', 1),
          ('🍅', 4),
          ('🍅', 5),
          ('🍅', 3),
      ])
      | 'Sum' >> beam.CombinePerKey(sum)
      | beam.Map(print))

('🥕', 5)
('🍆', 1)
('🍅', 12)


CombineValues:

* Combines an iterable of values in a keyed collection of elements.

* CombineValues accepts a function that takes an iterable of elements as an input, and combines them to return a single element.

* CombineValues expects a keyed PCollection of elements, where the value is an iterable of elements to be combined.

In [None]:

with beam.Pipeline() as pipeline:
  total = (
      pipeline
      | 'Create produce counts' >> beam.Create([
          ('🥕', [3, 2]),
          ('🍆', [1]),
          ('🍅', [4, 5, 3]),
      ])
      | 'Sum' >> beam.CombineValues(sum)
      | beam.Map(print))
     

('🥕', 5)
('🍆', 1)
('🍅', 12)


###Side Inputs and Outputs

Side Inputs:

• A side input is an additional input that your DoFn can access each time it processes an element in the input PCollection.

• In addition to the main input PCollection, you can provide additional inputs to a ParDo transform in the form of side inputs.



In [None]:
p1 = beam.Pipeline()

input_list = list()
with open ('students_exclude.txt','r') as exclude_file:
  for stud_id in exclude_file:
    input_list.append(stud_id.rstrip())

print(input_list)
     

In [None]:

class SplitRow(beam.DoFn):
  def process(self,element,input_list):
    customer = element.split(',')
    if customer[0] not in input_list:
      return [customer]

customers = (
    p1
    |beam.io.ReadFromText('Students_age.txt')
    |beam.ParDo(SplitRow(),input_list)  #can pass any number of side inputs in this ParDo function
    |beam.io.WriteToText('data/output')
)
p1.run()

In [None]:
!{('head -n 10 data/output-00000-of-00001')}


Side Outputs/Additional Outputs:

* Additional outputs in parDo transformation

* While ParDo always produces a main output PCollection (as the return value from apply), you can also have your ParDo produce any number of additional output PCollections.

In [None]:
p1 = beam.Pipeline()

side_list = list()
with open ('students_exclude.txt','r') as exclude_file:
  for cust_id in exclude_file:
    side_list.append(cust_id.rstrip())

print(side_list)

class SplitRow(beam.DoFn):
  def process(self,element,side_list):
    customer = element.split(',')
    if customer[0] not in side_list:
      return [customer]

class ProcessCustomers(beam.DoFn):
  def process(self,element,country,start_char):
    if(element[2]==country):
      yield  element
    else:
      yield  beam.pvalue.TaggedOutput('Other_student',element)
    if(element[1].startswith('r')):
       yield  beam.pvalue.TaggedOutput('Names_r',element)
  


customers = (
    p1
    |beam.io.ReadFromText('Students_age.txt')
    |beam.ParDo(SplitRow(),side_list)
    |beam.ParDo(ProcessCustomers(),'chn','r').with_outputs('Names_r','Other_student',main='Chennai_Cust')
)

chennai_customers = customers.Chennai_Cust
other_cities_customers = customers.Other_student
customer_withname_r = customers.Names_r

chennai_customers | 'Write Chennai Students PCollection' >> beam.io.WriteToText("chennai")
other_cities_customers  | 'Write Students PCollection that lives in other cities' >> beam.io.WriteToText("students_other_cities")
customer_withname_r  | 'Write Students names with r PCollection' >> beam.io.WriteToText("customers_names_r")


p1.run()

In [None]:
! cat chennai-00000-of-00001


In [None]:
!cat students_other_cities-00000-of-00001


In [None]:
!cat customers_names_r-00000-of-00001


###Composite Transformation in Apache Beam

#DATA 

https://github.com/vigneshSs-07/Cloud-AI-Analytics/tree/main/Apache%20Beam%20-Python

###grocery.txt

In [None]:
"""
Item_Identifier,Item_Weight,Item_Fat_Content,Item_Visibility,Item_Type,Item_MRP,Outlet_Identifier,Outlet_Establishment_Yea,Outlet_Size,Outlet_Location_Type,Outlet_Type,Item_Outlet_Sales
FDA15,9.3,Low Fat,0.016047301,Dairy,249.8092,OUT049,1999,Medium,Tier 1,Supermarket Type1,3735.138
DRC01,5.92,Regular,0.019278216,Soft Drinks,48.2692,OUT018,2009,Medium,Tier 3,Supermarket Type2,443.4228
FDN15,17.5,Low Fat,0.016760075,Meat,141.618,OUT049,1999,Medium,Tier 1,Supermarket Type1,2097.27
FDX07,19.2,Regular,0,Fruits and Vegetables,182.095,OUT010,1998,,Tier 3,Grocery Store,732.38
NCD19,8.93,Low Fat,0,Household,53.8614,OUT013,1987,High,Tier 3,Supermarket Type1,994.7052
FDP36,10.395,Regular,0,Baking Goods,51.4008,OUT018,2009,Medium,Tier 3,Supermarket Type2,556.6088
FDO10,13.65,Regular,0.012741089,Snack Foods,57.6588,OUT013,1987,High,Tier 3,Supermarket Type1,343.5528
FDP10,,Low Fat,0.127469857,Snack Foods,107.7622,OUT027,1985,Medium,Tier 3,Supermarket Type3,4022.7636
FDH17,16.2,Regular,0.016687114,Frozen Foods,96.9726,OUT045,2002,,Tier 2,Supermarket Type1,1076.5986
FDU28,19.2,Regular,0.09444959,Frozen Foods,187.8214,OUT017,2007,,Tier 2,Supermarket Type1,4710.535
FDY07,11.8,Low Fat,0,Fruits and Vegetables,45.5402,OUT049,1999,Medium,Tier 1,Supermarket Type1,1516.0266
FDA03,18.5,Regular,0.045463773,Dairy,144.1102,OUT046,1997,Small,Tier 1,Supermarket Type1,2187.153
FDX32,15.1,Regular,0.1000135,Fruits and Vegetables,145.4786,OUT049,1999,Medium,Tier 1,Supermarket Type1,1589.2646
FDS46,17.6,Regular,0.047257328,Snack Foods,119.6782,OUT046,1997,Small,Tier 1,Supermarket Type1,2145.2076
FDF32,16.35,Low Fat,0.0680243,Fruits and Vegetables,196.4426,OUT013,1987,High,Tier 3,Supermarket Type1,1977.426
FDP49,9,Regular,0.069088961,Breakfast,56.3614,OUT046,1997,Small,Tier 1,Supermarket Type1,1547.3192
NCB42,11.8,Low Fat,0.008596051,Health and Hygiene,115.3492,OUT018,2009,Medium,Tier 3,Supermarket Type2,1621.8888
FDP49,9,Regular,0.069196376,Breakfast,54.3614,OUT049,1999,Medium,Tier 1,Supermarket Type1,718.3982
DRI11,,Low Fat,0.034237682,Hard Drinks,113.2834,OUT027,1985,Medium,Tier 3,Supermarket Type3,2303.668
FDU02,13.35,Low Fat,0.10249212,Dairy,230.5352,OUT035,2004,Small,Tier 2,Supermarket Type1,2748.4224
FDN22,18.85,Regular,0.138190277,Snack Foods,250.8724,OUT013,1987,High,Tier 3,Supermarket Type1,3775.086
FDW12,,Regular,0.035399923,Baking Goods,144.5444,OUT027,1985,Medium,Tier 3,Supermarket Type3,4064.0432
NCB30,14.6,Low Fat,0.025698134,Household,196.5084,OUT035,2004,Small,Tier 2,Supermarket Type1,1587.2672
FDC37,,Low Fat,0.057556998,Baking Goods,107.6938,OUT019,1985,Small,Tier 1,Grocery Store,214.3876
FDR28,13.85,Regular,0.025896485,Frozen Foods,165.021,OUT046,1997,Small,Tier 1,Supermarket Type1,4078.025
NCD06,13,Low Fat,0.099887103,Household,45.906,OUT017,2007,,Tier 2,Supermarket Type1,838.908
FDV10,7.645,Regular,0.066693437,Snack Foods,42.3112,OUT035,2004,Small,Tier 2,Supermarket Type1,1065.28
DRJ59,11.65,low fat,0.019356132,Hard Drinks,39.1164,OUT013,1987,High,Tier 3,Supermarket Type1,308.9312
FDE51,5.925,Regular,0.161466534,Dairy,45.5086,OUT010,1998,,Tier 3,Grocery Store,178.4344
FDC14,,Regular,0.072221801,Canned,43.6454,OUT019,1985,Small,Tier 1,Grocery Store,125.8362
FDV38,19.25,Low Fat,0.170348551,Dairy,55.7956,OUT010,1998,,Tier 3,Grocery Store,163.7868
NCS17,18.6,Low Fat,0.080829372,Health and Hygiene,96.4436,OUT018,2009,Medium,Tier 3,Supermarket Type2,2741.7644
FDP33,18.7,Low Fat,0,Snack Foods,256.6672,OUT018,2009,Medium,Tier 3,Supermarket Type2,3068.0064
FDO23,17.85,Low Fat,0,Breads,93.1436,OUT045,2002,,Tier 2,Supermarket Type1,2174.5028
DRH01,17.5,Low Fat,0.097904029,Soft Drinks,174.8738,OUT046,1997,Small,Tier 1,Supermarket Type1,2085.2856
NCX29,10,Low Fat,0.089291137,Health and Hygiene,146.7102,OUT049,1999,Medium,Tier 1,Supermarket Type1,3791.0652
FDV20,,Regular,0.059511812,Fruits and Vegetables,128.0678,OUT027,1985,Medium,Tier 3,Supermarket Type3,2797.6916
DRZ11,8.85,Regular,0.113123893,Soft Drinks,122.5388,OUT018,2009,Medium,Tier 3,Supermarket Type2,1609.9044
FDX10,,Regular,0.123111453,Snack Foods,36.9874,OUT027,1985,Medium,Tier 3,Supermarket Type3,388.1614
FDB34,,Low Fat,0.026480954,Snack Foods,87.6198,OUT027,1985,Medium,Tier 3,Supermarket Type3,2180.495
FDU02,13.35,Low Fat,0.102511504,Dairy,230.6352,OUT046,1997,Small,Tier 1,Supermarket Type1,3435.528
FDK43,9.8,Low Fat,0.02681843,Meat,126.002,OUT013,1987,High,Tier 3,Supermarket Type1,2150.534
FDA46,13.6,Low Fat,0.117818348,Snack Foods,192.9136,OUT049,1999,Medium,Tier 1,Supermarket Type1,2527.3768
FDC02,21.35,Low Fat,0.069102831,Canned,259.9278,OUT018,2009,Medium,Tier 3,Supermarket Type2,6768.5228
FDL50,12.15,Regular,0.042277867,Canned,126.5046,OUT013,1987,High,Tier 3,Supermarket Type1,373.5138
FDM39,6.42,LF,0.089498926,Dairy,178.1002,OUT010,1998,,Tier 3,Grocery Store,358.2004
NCP05,19.6,Low Fat,0,Health and Hygiene,153.3024,OUT045,2002,,Tier 2,Supermarket Type1,2428.8384
FDV49,10,Low Fat,0.025879577,Canned,265.2226,OUT045,2002,,Tier 2,Supermarket Type1,5815.0972
FDL12,15.85,Regular,0.121632721,Baking Goods,60.622,OUT046,1997,Small,Tier 1,Supermarket Type1,2576.646
"""

###Students.txt

In [None]:
"""
Id,Student_name,City,Age,Marks,Result
1,vignesh,chn,27,15,FAIL
2,joey,us,51,20,FAIL
3,chandler,us,53,68,PASS
4,khaula,hyd,26,99,PASS
5,neethu,uae,27,100,PASS
6,sree,koc,25,27,FAIL
7,sai,mad,21,71,PASS
8,sabari,vel,25,75,PASS
9,tinkle,ker,27,9,FAIL
10,swati,ind,24,91,PASS
"""

###Students_exclude.txt

In [None]:
"""
1
3
7
9
"""