# Learning Functions creation

This Jupyuter notebook is part of a Master assingments. In this case I'll "invent" functions to practice how to work with python by functional programming 

In [3]:
# I made this function years ago to simplify calculations in a personal projects

# Required libraries
import pandas as pd


# To simplify the process of grouping the Passenger and Dispatch variables, the following variable will be created:
def grupo_por_columna(dt, columna_agrupada):
    #   Group data in sum agregation
    agrupacion = dt.groupby(columna_agrupada).agg({'PASAJEROS': 'sum', 'DESPACHOS': 'sum'}).reset_index()
    return agrupacion


### Using grupo_por_columna

In [4]:
data = pd.read_csv("Transporte Pasajeros 2019-2023.csv")

# Eg 1 groubing by just 'MES-AÑO'
data['FECHA_DESPACHO'] = pd.to_datetime(data['FECHA_DESPACHO'])
data['MES-AÑO'] = data['FECHA_DESPACHO'].dt.to_period('M')
data_agrupada_mes = grupo_por_columna(data, 'MES-AÑO')

print(data_agrupada_mes.head())


   MES-AÑO  PASAJEROS  DESPACHOS
0  2019-01      55096       6062
1  2019-02      19156       1466
2  2019-03        110          3
3  2019-04        895         55
4  2019-05      32625       2923


In [6]:
# Grouping by "DEPARTEMENTO_ORIGEN"
tendencia_anual_por_departamento = grupo_por_columna(data, 'DEPARTAMENTO_ORIGEN')

print(tendencia_anual_por_departamento.head(10))

  DEPARTAMENTO_ORIGEN  PASAJEROS  DESPACHOS
0           ANTIOQUIA   65995610    7011412
1              ARAUCA      78547       9288
2           ATLÁNTICO    9297180    1271075
3        BOGOTÁ, D.C.   85434411   11923539
4             BOLÍVAR   15215346    1751046
5              BOYACÁ   37410265    4984071
6              CALDAS   14036385    1705261
7             CAQUETÁ    5687825     896045
8            CASANARE    2986045     437678
9               CAUCA   12296827    1092384


In [8]:
# Grouping by "CLASE_VEHICULO" and "NIVEL_SERVICIO"
data_por_vehiculo = grupo_por_columna(data, ['CLASE_VEHICULO', 'NIVEL_SERVICIO'])

print(data_por_vehiculo.head(10))

  CLASE_VEHICULO        NIVEL_SERVICIO  PASAJEROS  DESPACHOS
0      AUTOMOVIL                BASICO    3310648    1063537
1      AUTOMOVIL                  LUJO    4350753    1435932
2      AUTOMOVIL  PREFERENCIAL DE LUJO        213         47
3            BUS                BASICO   73567689    5893970
4            BUS                  LUJO  174681848   17410473
5            BUS  PREFERENCIAL DE LUJO     147240       5169
6         BUSETA                BASICO   21286977    2725407
7         BUSETA                  LUJO   26979582    4598104
8         BUSETA  PREFERENCIAL DE LUJO      16426       1609
9         CAMION                BASICO         12          3


## Function 2
Title Case words in spanish

In [9]:
def title_case_spanish(s: str) -> str:
    """
    Apply 'Title Case' ignoring common prepositions in spanish
    (ej. 'de', 'la', 'y', 'en') except if these are at beging of a phrase.
    """
    small_words = {
        "y","a","de","del","la","el","las","los","en","con","por","para","sin",
        "sobre","entre","un","una","unos","unas","al","lo"
    }
    words = s.split()
    out = []
    for i, w in enumerate(words):
        lw = w.lower()
        if i != 0 and lw in small_words:
            out.append(lw)
        else:
            out.append(w.capitalize())
    return " ".join(out)


### Using title_case_spanish

In [10]:
print('Ej1: ')
print(title_case_spanish("la casa de los dibujos"))

print('Ej2: ')
print(title_case_spanish("harry potter y la orden del fénix"))

print('Ej1: ')
print(title_case_spanish("desde mi cielo - mago de oz"))

Ej1: 
La Casa de los Dibujos
Ej2: 
Harry Potter y la Orden del Fénix
Ej1: 
Desde Mi Cielo - Mago de Oz


# Function 3

Show ISO codes and phone code for a given country

In [16]:
# This function sill required pandas because requires a .csv file

def info_country(country):
    """
    search a given country (by Spanish or English name) in the CSV file
    and return its ISO2, ISO3, and PHONE_CODE.
    """
    # Load file
    info_file = pd.read_csv('paises.csv')

    # Normalize content
    country_lower = country.strip().lower()

    # Look up by ESPAÑOL or ENGLISH
    match = info_file[
        (info_file["ESPAÑOL"].str.lower() == country_lower) |
        (info_file["ENGLISH"].str.lower() == country_lower)
    ]

    if match.empty:
        return {"error": f"Country '{country}' not found."}

    row = match.iloc[0]
    return {
        "ESPAÑOL": row["ESPAÑOL"],
        "ENGLISH": row["ENGLISH"],
        "ISO2": row["ISO2"],
        "ISO3": row["ISO3"],
        "PHONE_CODE": row["PHONE_CODE"]
    }
    

### Using info_country

In [17]:
print("Ej1:")
print(info_country('Spain'))

print("Ej2:")
print(info_country('Colombia'))

print("Ej3:")
print(info_country('México'))

Ej1:
{'ESPAÑOL': 'España', 'ENGLISH': 'Spain', 'ISO2': 'ES', 'ISO3': 'ESP', 'PHONE_CODE': '34'}
Ej2:
{'ESPAÑOL': 'Colombia', 'ENGLISH': 'Colombia', 'ISO2': 'CO', 'ISO3': 'COL', 'PHONE_CODE': '57'}
Ej3:
{'ESPAÑOL': 'México', 'ENGLISH': 'Mexico', 'ISO2': 'MX', 'ISO3': 'MEX', 'PHONE_CODE': '52'}


# Function 4

Add placeholder for long lines, ideal for coding formating logs

In [12]:
def stringify_with_limit(obj, limit: int = 80, placeholder: str = '...') -> str:
    """
    Conver objects to strings and trunc
    `limit` characteres, adding a placeholder.
    """
    s = str(obj)
    if len(s) <= limit:
        return s
    cut = max(0, limit - len(placeholder))
    return s[:cut] + placeholder

In [13]:
long = "a"*100
print("Ej1: ")
print(stringify_with_limit(long, limit=20))

text = "About this point, there are missing values in number of hospital beds column based on the total count rows in the table"
print("Ej2:")
print(stringify_with_limit(text, limit=40))

text2 = "Creating some dependent and independent variables "
print("Ej2:")
print(stringify_with_limit(text2, limit=15))

Ej1: 
aaaaaaaaaaaaaaaaa...
Ej2:
About this point, there are missing v...
Ej2:
Creating som...


# Function 5

In [18]:
from typing import Optional, Union

def safe_divide(a: Union[int,float], b: Union[int,float],
                default: Optional[Union[int,float]] = None,
                rounding: Optional[int] = None) -> Optional[Union[int,float]]:
    """
    Divides a / b, returning `default` if division by zero or invalid values.
    If rounding is int, rounds the result to that number of decimal places.
    """
    try:
        res = a / b
    except Exception:
        return default
    if rounding is not None:
        return round(res, rounding)
    return res

### Using safe_divide

In [19]:
print("Ej1:")
print(safe_divide(5, 2))

print("Ej2:")
print(safe_divide(1, 0, default=float('inf')))

print(f'Ej3: {safe_divide(10, 3, rounding=2)}')

Ej1:
2.5
Ej2:
inf
Ej3: 3.33


# Function 6

In [20]:
from typing import List, Any

def chunk_list(lst: List[Any], n: int) -> List[List[Any]]:
    """
    Splits a list into chunks of size n. The last chunk may be smaller.
    Throws ValueError if n <= 0.
    """
    if n <= 0:
        raise ValueError("n debe ser > 0")
    return [lst[i:i+n] for i in range(0, len(lst), n)]

### Using chunk_list

In [23]:
print(f'Ej1: {chunk_list(list(range(7)), 3)}')

print(f'Ej2: {chunk_list(["a","b","c","d"], 2)}')

print(f'Ej3: {chunk_list([1,2,3], 5)}')

Ej1: [[0, 1, 2], [3, 4, 5], [6]]
Ej2: [['a', 'b'], ['c', 'd']]
Ej3: [[1, 2, 3]]


# Function 7

In [24]:
from typing import Dict, Any

def flatten_dict(d: Dict[str, Any], parent_key: str = '', sep: str = '.') -> Dict[str, Any]:
    """
    Flattens a dictionary nested within a dictionary with concatenated keys.
    If it finds lists, it includes the index in the key: e.g. ‘items.0.name’.
    """
    items = {}
    for k, v in d.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            items.update(flatten_dict(v, new_key, sep=sep))
        elif isinstance(v, list):
            for i, elem in enumerate(v):
                if isinstance(elem, dict):
                    items.update(flatten_dict(elem, f"{new_key}{sep}{i}", sep=sep))
                else:
                    items[f"{new_key}{sep}{i}"] = elem
        else:
            items[new_key] = v
    return items

### Using flaten_dict

In [25]:
d = {"a": 1, "b": {"c": 2, "d": {"e": 3}}}
print(f'Ej1: {flatten_dict(d)}')

fruits = {"Apple": [{"Pineapple": 1}, {"Banana": 2}], "Grape": 3}
print(flatten_dict(fruits))

data_user = {"user": {"name": "Ana", "roles": ["admin","user"]}}
print(flatten_dict(data_user))

Ej1: {'a': 1, 'b.c': 2, 'b.d.e': 3}
{'Apple.0.Pineapple': 1, 'Apple.1.Banana': 2, 'Grape': 3}
{'user.name': 'Ana', 'user.roles.0': 'admin', 'user.roles.1': 'user'}


# Function 8

In [26]:
from typing import List, Any

def dedupe_list_preserve_order(lst: List[Any]) -> List[Any]:
    """
    Removes duplicates while preserving the order of first appearance.
    Works with hashable elements; for non-hashable elements, adaptation would be required.
    """
    seen = set()
    out = []
    for x in lst:
        if x not in seen:
            seen.add(x)
            out.append(x)
    return out

### Using dedupe_list_preserve_order

In [27]:
print(f'Ej1: {dedupe_list_preserve_order([1,2,2,3,1,4])}')
print(f'Ej2: {dedupe_list_preserve_order(["Pear","Apple","Watermelon","Pineapple","Pear","Banana", "Apple", "Pear"])}')
print(f'Ej3: {dedupe_list_preserve_order([1,2,2,"Apple","Watermelon",4])}')

Ej1: [1, 2, 3, 4]
Ej2: ['Pear', 'Apple', 'Watermelon', 'Pineapple', 'Banana']
Ej3: [1, 2, 'Apple', 'Watermelon', 4]


# Function 9

In [29]:
import re
from typing import List, Union

def extract_numbers(s: str, floats: bool = True) -> List[Union[int, float]]:
    """
    Extract numbers from a string. If floats=True, return decimals where appropriate;
    otherwise, return all as integers (treating decimal points as separators).
    """
    pattern = r'[-+]?\d*\.\d+|[-+]?\d+'
    matches = re.findall(pattern, s)
    result = []
    for m in matches:
        if floats and ('.' in m or 'e' in m.lower()):
            result.append(float(m))
        else:
            # si floats==False forzamos int (posible pérdida de información)
            if '.' in m:
                result.append(float(m) if floats else int(float(m)))
            else:
                result.append(int(m))
    return result

### Using extract_numbers

In [30]:
print(f'Ej1: {extract_numbers("En 2024 hubo 3.5% y -2 casos")}')

print(f'Ej2: {extract_numbers("Cantidad de viajes con 0 parasajeros 9478651 Tamaño total del conjunto 63177087 razón pasajeros 0 sobre total 0.15003304916543556")}')

print(f'Ej3: {extract_numbers("Mes con el mínimo general: 2019-03, Despachos: 3, Pasajeros: 110")}')

Ej1: [2024, 3.5, -2]
Ej2: [0, 9478651, 63177087, 0, 0.15003304916543556]
Ej3: [2019, -3, 3, 110]


# Function 10

In [38]:
import re

def normalize_whitespace(s: str, case: str = 'preserve') -> str:
    """
    Remove extra spaces (including line breaks) leaving a single space between words
    and no leading/trailing spaces.

    `case` parameter:
      - ‘preserve’ : leaves the text case as is (default)
      - ‘lower’    : returns everything in lowercase
      - ‘title’: applies Title Case (capitalize each word)
      - ‘sentence’: sentence case (first letter of each sentence capitalized, rest lowercase)

    Note: ‘sentence’ understands sentences separated by ‘.’, ‘!’, or ‘?’ followed by a space.
    """
    cleaned = re.sub(r'\s+', ' ', s.strip())

    if case == 'preserve':
        return cleaned
    if case == 'lower':
        return cleaned.lower()
    if case == 'title':
        return cleaned.title()
    if case == 'sentence':
        # convert everything to lowercase and then capitalize the first letter
        s2 = cleaned.lower()
        pattern = re.compile(r'(^|(?<=[\.\!\?]\s))([a-záéíóúüñ])', flags=re.IGNORECASE)
        return pattern.sub(lambda m: m.group(1) + m.group(2).upper(), s2)

    raise ValueError("case must be one of: 'preserve','lower','title','sentence'")


### Using normalize_whitespace

In [39]:
s = "  Creando PRUEBAS \n esto   es   prueba  "
txt = "  Clima   caluroso    llamen a  EmErGeNcias"
sea = "welcome to redistribute livig  N"

print("Ej1 preserve:", normalize_whitespace(s))
print("Ej1 lower:   ", normalize_whitespace(s, case='lower'))
print("Ej1 title:   ", normalize_whitespace(s, case='title'))
print("Ej1 sentence:", normalize_whitespace(s, case='sentence'))

print("Ej2 preserve:", normalize_whitespace(txt))
print("Ej2 lower:   ", normalize_whitespace(txt, case='lower'))
print("Ej2 sentence:", normalize_whitespace(txt, case='sentence'))

print("Ej3 preserve:", normalize_whitespace(sea))
print("Ej3 lower:   ", normalize_whitespace(sea, case='lower'))
print("Ej3 title:   ", normalize_whitespace(sea, case='title'))

Ej1 preserve: Creando PRUEBAS esto es prueba
Ej1 lower:    creando pruebas esto es prueba
Ej1 title:    Creando Pruebas Esto Es Prueba
Ej1 sentence: Creando pruebas esto es prueba
Ej2 preserve: Clima caluroso llamen a EmErGeNcias
Ej2 lower:    clima caluroso llamen a emergencias
Ej2 sentence: Clima caluroso llamen a emergencias
Ej3 preserve: welcome to redistribute livig N
Ej3 lower:    welcome to redistribute livig n
Ej3 title:    Welcome To Redistribute Livig N


# Extra

In [40]:
def merge_unique(*lists):
    """
    Receives an unspecified number of lists and returns
    a list with all unique elements.
    """
    unique = set()
    for lst in lists:
        unique.update(lst)
    return list(unique)

### Using merge_unique

In [None]:
a = [1, 2, 3, 4]
b = [3, 4, 5, 6]
c = [6, 7, 8]

print(merge_unique(a, b, c))


a = ["Pear","Apple","Watermelon"]
b = ["Pineapple","Pear","Banana", "Apple", "Pear"]
c = ["Watermelon","Pineapple","Pear","Banana"]

