# Big Data ETL with pagination

In [4]:
import sys
import os

# Data loader with memory pagination

input_file = 'iris.csv'
page_size = 10

if not os.path.isfile(input_file):
	print("Archivo no existe.")
	sys.exit(1)

with open(input_file, 'r') as file:
	contents = file.readlines()

num_lines = len(contents)
num_pages = num_lines // page_size + 1

for page in range(num_pages):
	start_index = page * page_size
	end_index = min((page + 1) * page_size, num_lines)
	page_contents = contents[start_index:end_index]
	
	# Process the page contents
	for line in page_contents:
		print(line.strip())
	
	# Prompt for user input to continue or exit
	if page < num_pages - 1:
		user_input = input("Press Enter to load the next page or 'q' to quit: ")
		if user_input.lower() == 'q':
			break


sepal_length,sepal_width,petal_length,petal_width,species
5.1,3.5,1.4,0.2,setosa
4.9,3.0,1.4,0.2,setosa
4.7,3.2,1.3,0.2,setosa
4.6,3.1,1.5,0.2,setosa
5.0,3.6,1.4,0.2,setosa
5.4,3.9,1.7,0.4,setosa
4.6,3.4,1.4,0.3,setosa
5.0,3.4,1.5,0.2,setosa
4.4,2.9,1.4,0.2,setosa
4.9,3.1,1.5,0.1,setosa
5.4,3.7,1.5,0.2,setosa
4.8,3.4,1.6,0.2,setosa
4.8,3.0,1.4,0.1,setosa
4.3,3.0,1.1,0.1,setosa
5.8,4.0,1.2,0.2,setosa
5.7,4.4,1.5,0.4,setosa
5.4,3.9,1.3,0.4,setosa
5.1,3.5,1.4,0.3,setosa
5.7,3.8,1.7,0.3,setosa
5.1,3.8,1.5,0.3,setosa
5.4,3.4,1.7,0.2,setosa
5.1,3.7,1.5,0.4,setosa
4.6,3.6,1.0,0.2,setosa
5.1,3.3,1.7,0.5,setosa
4.8,3.4,1.9,0.2,setosa
5.0,3.0,1.6,0.2,setosa
5.0,3.4,1.6,0.4,setosa
5.2,3.5,1.5,0.2,setosa
5.2,3.4,1.4,0.2,setosa
4.7,3.2,1.6,0.2,setosa
4.8,3.1,1.6,0.2,setosa
5.4,3.4,1.5,0.4,setosa
5.2,4.1,1.5,0.1,setosa
5.5,4.2,1.4,0.2,setosa
4.9,3.1,1.5,0.1,setosa
5.0,3.2,1.2,0.2,setosa
5.5,3.5,1.3,0.2,setosa
4.9,3.1,1.5,0.1,setosa
4.4,3.0,1.3,0.2,setosa
5.1,3.4,1.5,0.2,setosa
5.0,3.5,1.3,0.3,setosa

In [45]:
import csv

class FileOperations:
	def __init__(self, file_path):
		self.file_path = file_path
	
	def read_csv(self):
		data = []
		with open(self.file_path, 'r') as file:
			csv_reader = csv.reader(file)
			# Crear columna de indices
			for i, row in enumerate(csv_reader):
				if i == 0:
					row.insert(0, 'Index')
				else:
					row.insert(0, str(i))
				data.append(row)
		return data if len(data) > 1 else None
	
	def write_csv(self, data):
		with open(self.file_path, 'w', newline='') as file:
			csv_writer = csv.writer(file)
			for row in data:
				csv_writer.writerow(row)

file_ops = FileOperations('iris.csv')
data = file_ops.read_csv()
data[:5]


[['Index',
  'sepal_length',
  'sepal_width',
  'petal_length',
  'petal_width',
  'species'],
 ['1', '5.1', '3.5', '1.4', '0.2', 'setosa'],
 ['2', '4.9', '3.0', '1.4', '0.2', 'setosa'],
 ['3', '4.7', '3.2', '1.3', '0.2', 'setosa'],
 ['4', '4.6', '3.1', '1.5', '0.2', 'setosa']]

In [50]:
class Searcher: 
	def __init__(self, data):
		self.data = data
	
	def linear_search(self, target):
		for i, item in enumerate(self.data):
			if item == target:
				return i
		return None
	
	def binary_search(self, target):
		low = 0
		high = len(self.data) - 1
		while low <= high:
			mid = (low + high) // 2
			if self.data[mid] == target:
				return mid
			elif self.data[mid] < target:
				low = mid + 1
			else:
				high = mid - 1
		return None



In [51]:
searcher = Searcher(data)
# Linear search
sorted_data = searcher.linear_search(['2', '4.9', '3.0', '1.4', '0.2', 'setosa'])
print(sorted_data)

# Binary search
sorted_data = searcher.binary_search(['2', '4.9', '3.0', '1.4', '0.2', 'setosa'])
print(sorted_data) #To fix!

2
None
