# Manejo de archivos

## import de librerias

In [1]:
import io
import os
import struct
import time

### Clase para el tratamiento de datos

In [2]:
file_read: str = "iris_df.csv"
file_bk: str = "iris_bk.csv"
file_created: str = "iris_created.csv"
iris_binary: str = "iris_binary.bin"
iris_bk_test: str = "iris_bk_test.csv"

os.remove(file_created)

class IrisClass:
    def __init__(
        self,
        sepal_length: str,
        sepal_width: str,
        petal_length: str,
        petal_width: str,
        class_type: str
    ) -> None:
        self.sepal_length = float(sepal_length)
        self.sepal_width = float(sepal_width)
        self.petal_length = float(petal_length)
        self.petal_width = float(petal_width)
        self.class_type = class_type.replace("\n", "")

    def transform_class_type(self) -> str:
        mapper = {
            "Iris-setosa": 1,
            "Iris-versicolor": 2,
            "Iris-virginica": 3,
        }
        n_class_type = mapper[self.class_type]

        return (
            f'{self.sepal_length},'
            f'{self.sepal_width},'
            f'{self.petal_length},'
            f'{self.petal_width},'
            f'{n_class_type}'
        )

    def return_list_numbers(self) -> list:
        mapper = {
            "Iris-setosa": 1,
            "Iris-versicolor": 2,
            "Iris-virginica": 3,
        }
        n_class_type = mapper[self.class_type]

        return [
            self.sepal_length,
            self.sepal_width,
            self.petal_length,
            self.petal_width,
            n_class_type,
        ]

    def __str__(self) -> str:
        return (
            f'Class Flower: {self.class_type}\n'
            f'Sepal Size: [{self.sepal_length}, {self.sepal_width}]\n'
            f'Petal Size: [{self.petal_length}, {self.petal_width}]\n'
        )

    def __repr__(self) -> str:
        return (
            f'Class Flower: {self.class_type}\n'
            f'Sepal Length: {self.sepal_length}\n'
            f'Sepal Width: {self.sepal_width}\n'
            f'Petal Length: {self.petal_length}\n'
            f'Petal Width: {self.petal_width}\n'
        )

## Funciones para abrir archivos
<p>
    Existen dos maneras de abrir los archivos:
    <ol>
        <li> open y close de forma tradicional </li>
        <li> open usando contexto, en este no se usa close el mismo contexto lo aplica </li>
    </ol>
</p>

### Forma tradicional

In [3]:
f = open(file_read, "r", encoding="utf-8")
lines = 5
count = 0

for line in f:
    count += 1
    print(line.replace("\n", ""))

    if count == lines:
        break

f.close()

5.1,3.5,1.4,0.2,Iris-setosa
4.9,3.0,1.4,0.2,Iris-setosa
4.7,3.2,1.3,0.2,Iris-setosa
4.6,3.1,1.5,0.2,Iris-setosa
5.0,3.6,1.4,0.2,Iris-setosa


### Usando open como contexto

In [4]:
with open(file_read, "r", encoding="utf-8") as f:
    lines = 5
    count = 0

    for line in f:
        count += 1
        print(line.replace("\n", ""))

        if count == lines:
            break

5.1,3.5,1.4,0.2,Iris-setosa
4.9,3.0,1.4,0.2,Iris-setosa
4.7,3.2,1.3,0.2,Iris-setosa
4.6,3.1,1.5,0.2,Iris-setosa
5.0,3.6,1.4,0.2,Iris-setosa


## Leyendo un archivo
<p>
    Se pueden leer los archivos de dos formas:
    <ol>
        <li> usando `readlines` para leer el archivo completo </li>
        <li> usando el `objeto File` como iterador para leer linea por linea </li>
    </ol>
</p>

#### Todas las lineas

In [5]:
with open(file_read, "r", encoding="utf-8") as f:
    all_lines = f.readlines()
    print("Muestra de lineas: ", all_lines[:5])
    print("Numero de lineas en el archivo", len(all_lines))

Muestra de lineas:  ['5.1,3.5,1.4,0.2,Iris-setosa\n', '4.9,3.0,1.4,0.2,Iris-setosa\n', '4.7,3.2,1.3,0.2,Iris-setosa\n', '4.6,3.1,1.5,0.2,Iris-setosa\n', '5.0,3.6,1.4,0.2,Iris-setosa\n']
Numero de lineas en el archivo 151


### Linea por linea

In [6]:
num_read_lines = 5

with open(file_read, "r", encoding="utf-8") as f:
    # Leyendo todo el archivo usando el iterador
    other_lines = []
    all_lines = [line for line in f]
    print(all_lines[:5])
    print(len(all_lines))
    print("*" * 80)
    # este metodo sirve para regresar el apuntador del archivo al principio
    f.seek(0)

    # leyendo solo 5 lineas del archivo
    for i, line in enumerate(f):
        other_lines.append(line)

        if i == num_read_lines:
            break

    print(other_lines)

['5.1,3.5,1.4,0.2,Iris-setosa\n', '4.9,3.0,1.4,0.2,Iris-setosa\n', '4.7,3.2,1.3,0.2,Iris-setosa\n', '4.6,3.1,1.5,0.2,Iris-setosa\n', '5.0,3.6,1.4,0.2,Iris-setosa\n']
151
********************************************************************************
['5.1,3.5,1.4,0.2,Iris-setosa\n', '4.9,3.0,1.4,0.2,Iris-setosa\n', '4.7,3.2,1.3,0.2,Iris-setosa\n', '4.6,3.1,1.5,0.2,Iris-setosa\n', '5.0,3.6,1.4,0.2,Iris-setosa\n', '5.4,3.9,1.7,0.4,Iris-setosa\n']


## Escribiendo un archivo
<p>
    Para escribir un archivo podemos hacer uso de estos 3 modos:
    <ol>
        <li> `w` - sirve para crear el archivo, pero si existe lo borra </li>
        <li> `a` - sirve para agregar nuevos elementos al archivo </li>
        <li> `x` - sirve solo para crear el archivo, falla si el archivo existe </li>
    </ol>
</p>

<p>
    De igual manera para agregar texto al archivo se usa:
    <ol>
        <li> `write` - se usa para escribir solo una linea en el archivo </li>
        <li> `writelines` - se usa para escribir multiples lineas (list) en el archivo </li>
    </ol>
</p>

### Leyendo el archivo para reescrirlo

In [7]:
with open(file_read, "r", encoding="utf-8") as f:
    all_lines = f.readlines()
    print(all_lines[:5])

print("*" * 80)
clean_lines = [line.split(",") for line in all_lines]
print(clean_lines[:5])
print(len(clean_lines[0]))
# Creando clase para parseo
iris_class = [
    IrisClass(line[0], line[1], line[2], line[3], line[4])
    for line in clean_lines
    if len(line) == 5
]
print(iris_class[0])

['5.1,3.5,1.4,0.2,Iris-setosa\n', '4.9,3.0,1.4,0.2,Iris-setosa\n', '4.7,3.2,1.3,0.2,Iris-setosa\n', '4.6,3.1,1.5,0.2,Iris-setosa\n', '5.0,3.6,1.4,0.2,Iris-setosa\n']
********************************************************************************
[['5.1', '3.5', '1.4', '0.2', 'Iris-setosa\n'], ['4.9', '3.0', '1.4', '0.2', 'Iris-setosa\n'], ['4.7', '3.2', '1.3', '0.2', 'Iris-setosa\n'], ['4.6', '3.1', '1.5', '0.2', 'Iris-setosa\n'], ['5.0', '3.6', '1.4', '0.2', 'Iris-setosa\n']]
5
Class Flower: Iris-setosa
Sepal Size: [5.1, 3.5]
Petal Size: [1.4, 0.2]



### Reescribiendo el archivo (cambiando el valor del tipo de Iris a numerico)

#### Usando `write`

In [8]:
with open(file_bk, "w", encoding="utf-8") as f:
    try:
        f.readlines()
    except io.UnsupportedOperation as uo:
        print("Is not possible read file `%s`" % str(uo))

    for iris in iris_class:
        f.write(iris.transform_class_type() + "\n")

Is not possible read file `not readable`


#### Usando `writelines`

In [9]:
with open(file_bk, "w", encoding="utf-8") as f:
    all_lines_w = [iris.transform_class_type() + "\n" for iris in iris_class]
    f.writelines(all_lines_w)

#### Usando `a` para agregar nuevas lineas

##### Usando `write`

In [10]:
with open(file_bk, "a", encoding="utf-8") as f:
    try:
        f.readlines()
    except io.UnsupportedOperation as uo:
        print("Is not possible read file `%s`" % str(uo))

    new_lines = [
        iris.transform_class_type() + "\n" for iris in iris_class
    ][:5]

    for line in new_lines:
        f.write(line)

Is not possible read file `not readable`


#### Usando `writelines`

In [11]:
with open(file_bk, "a", encoding="utf-8") as f:
    new_lines = [
        iris.transform_class_type() + "\n" for iris in iris_class
    ][:5]

    f.writelines(new_lines)

#### Usando `x`

In [12]:
with open(file_created, "x", encoding="utf-8") as f:
    new_lines = [
        iris.transform_class_type() + "\n" for iris in iris_class
    ][:5]

    f.writelines(new_lines)

    for line in new_lines:
        f.write(line)

In [13]:
try:
    with open(file_created, "x", encoding="utf-8") as f:
        new_lines = [
            iris.transform_class_type() + "\n" for iris in iris_class
        ][:5]

        f.writelines(new_lines)
except FileExistsError as fee:
    print(fee)

[Errno 17] File exists: 'iris_created.csv'


### Creando y leyendo un archivo binario

<p>
    Para este ejemplo se usa el mismo dataset, debido a que se usan numeros de punto flotante
    se debe de hacer un empaquetado con la libreria `struct` nativa de python.
</p>
<p>
    Para la parte de leer se tiene que hacer el proceso inverso, en este caso por ser de
    tipo flotante se debe declarar un buffer de 4 bytes:
    https://docs.python.org/3/library/struct.html#format-characters
</p>

In [14]:
with open(iris_binary, "wb") as f:
    new_lines = [
        struct.pack(
            '%sf' % len(iris.return_list_numbers()),
            *iris.return_list_numbers(),
        )
        for iris in iris_class
    ]

    f.writelines(new_lines)

In [15]:
with open(iris_binary, "rb") as f:
    array_complete = []
    simple_array = []

    while (buff := f.read(4)):
        simple_array.append(struct.unpack("f", buff)[0])

        if len(simple_array) == 5:
            array_complete.append(simple_array)
            simple_array = []

print(array_complete[:5])

[[5.099999904632568, 3.5, 1.399999976158142, 0.20000000298023224, 1.0], [4.900000095367432, 3.0, 1.399999976158142, 0.20000000298023224, 1.0], [4.699999809265137, 3.200000047683716, 1.2999999523162842, 0.20000000298023224, 1.0], [4.599999904632568, 3.0999999046325684, 1.5, 0.20000000298023224, 1.0], [5.0, 3.5999999046325684, 1.399999976158142, 0.20000000298023224, 1.0]]


#### usando `+`

#### Escribiendo el archivo

In [16]:
with open(iris_bk_test, "w+") as f:
    lines = f.readlines()
    print("*" * 80)
    print(lines)
    print("*" * 80)
    new_lines = [
        iris.transform_class_type() + "\n" for iris in iris_class
    ]
    time.sleep(20)
    f.writelines(new_lines)
    f.seek(0)
    lines = f.readlines()
    print("*" * 80)
    print(lines[:5])
    print("*" * 80)

********************************************************************************
[]
********************************************************************************
********************************************************************************
['5.1,3.5,1.4,0.2,1\n', '4.9,3.0,1.4,0.2,1\n', '4.7,3.2,1.3,0.2,1\n', '4.6,3.1,1.5,0.2,1\n', '5.0,3.6,1.4,0.2,1\n']
********************************************************************************


#### Leyendo el archivo

In [17]:
with open(iris_bk_test, "r+") as f:
    lines = f.readlines()
    print(lines[:5])
    f.writelines(lines[:5])

['5.1,3.5,1.4,0.2,1\n', '4.9,3.0,1.4,0.2,1\n', '4.7,3.2,1.3,0.2,1\n', '4.6,3.1,1.5,0.2,1\n', '5.0,3.6,1.4,0.2,1\n']
