# Demonstration of List appplication

## 1. Standardize Input's Length Using Padding Technique

In [None]:
def pad_sequences(sequences):
    """
    Pad a list of sequences with zeros so 
    that all sequences have the same length.

    Args:
        sequences (List[List[int]]): A list of lists (sequences) of integers.

    Returns:
        List[List[int]]: A new list where each sequence is padded
                with zeros to match the maximum length.
    """
    if not sequences:
        return []

    max_len = max(len(seq) for seq in sequences)  # Find maximum sequence length

    # Pad each sequence without modifying the original
    return [seq + [0] * (max_len - len(seq)) for seq in sequences]



In [None]:
# Test 1: Sequences of different lengths
assert pad_sequences([[1, 2], [3, 4, 5], [6]])\
    == [[1, 2, 0], [3, 4, 5], [6, 0, 0]], "Test 1 Failed"

# Test 2: All sequences already have the same length, no padding needed
assert pad_sequences([[1, 1], [2, 2], [3, 3]])\
    == [[1, 1], [2, 2], [3, 3]], "Test 2 Failed"

# Test 3: One empty sequence and one long sequence, the empty sequence is padded
assert pad_sequences([[1, 2, 3], []])\
    == [[1, 2, 3], [0, 0, 0]], "Test 3 Failed"

# Test 4: Only one sequence, keep it as is
assert pad_sequences([[7, 8]]) == [[7, 8]], "Test 4 Failed"

# Test 5: Input list is empty, return an empty list
assert pad_sequences([]) == [], "Test 5 Failed"

print("✅ All tests passed!")


## 2. Train - Validation - Test Split

In [None]:
from typing import List, Tuple, Any
import random

def split_dataset(data: List[Any], 
                  train_ratio: float, 
                  val_ratio: float, 
                  shuffle: bool = False
                  ) -> Tuple[List[Any], List[Any], List[Any]]:
    """
    Split a dataset into train, validation, and test sets 
    according to given ratios.

    Args:
        data (List[Any]): The dataset to split.
        train_ratio (float): Proportion of data 
                    for the training set (0 <= train_ratio <= 1).
        val_ratio (float): Proportion of data 
                    for the validation set (0 <= val_ratio <= 1).
        shuffle (bool): Whether to shuffle the dataset before splitting. 
                    Default is False.

    Returns:
        Tuple[List[Any], List[Any], List[Any]]: Train, validation, 
            and test datasets.
    """
    if not data:
        return [], [], []

    if not (0 <= train_ratio <= 1 and 0 <= val_ratio <= 1):
        raise ValueError("train_ratio and val_ratio must be between 0 and 1")

    if shuffle:
        data = data.copy()  # avoid modifying original list
        random.shuffle(data)

    data_len = len(data)
    train_end = round(data_len * train_ratio)
    val_end = train_end + round(data_len * val_ratio)

    train_set = data[:train_end]
    val_set = data[train_end:val_end]
    test_set = data[val_end:]

    return train_set, val_set, test_set


In [None]:
# Test 1: Standard split ratios
dataset = list(range(100))
train, val, test = split_dataset(dataset, 0.7, 0.15)
assert (len(train), len(val), len(test)) == (70, 15, 15), "Test 1 Failed"

# Test 2: Small dataset, evenly split, check rounding
dataset = ['a', 'b', 'c']
train, val, test = split_dataset(dataset, 0.5, 0.5)
assert (len(train), len(val), len(test)) == (2, 1, 0), "Test 2 Failed"

# Test 3: No validation set
dataset = list(range(5))
train, val, test = split_dataset(dataset, 0.8, 0.0)
assert (train, val, test) == (list(range(4)), [], [4]), "Test 3 Failed"

# Test 4: Empty dataset
dataset = []
train, val, test = split_dataset(dataset, 0.6, 0.2)
assert (train, val, test) == ([], [], []), "Test 4 Failed"

# Test 5: Total ratio less than 1, remainder goes to test set
dataset = list(range(10))
train, val, test = split_dataset(dataset, 0.2, 0.3)  # 2, 3, 5
assert (train, val, test) == \
(list(range(2)), list(range(2, 5)), list(range(5, 10))), "Test 5 Failed"

print("✅ All tests passed!")


## 3. Flattening Token for Vocabulary Bag (NLP)

In [35]:
def flatten_tokens(corpus):
    """
    Flatten a nested list of tokens (list of sentences) 
    into a single list of tokens.

    Args:
        corpus (List[List[Any]]): A list of sentences, 
        each sentence is a list of tokens.

    Returns:
        List[Any]: A flat list containing all tokens from all sentences.
                   Returns an empty list if the input corpus is empty.
    """
    if not corpus:
        return []
    return [word for sentence in corpus for word in sentence]


In [None]:
# Test 1: Two sentences, each with multiple words, flatten the entire corpus
assert flatten_tokens([["hello", "world"], ["this", "is", "a", "test"]]) == \
["hello", "world", "this", "is", "a", "test"], "Test 1 Failed"

# Test 2: One short sentence and one single-word sentence, 
# check handling of uneven lists
assert flatten_tokens([["a", "b"], ["c"]]) == ["a", "b", "c"], "Test 2 Failed"

# Test 3: Empty text corpus, output should be an empty list
assert flatten_tokens([]) == [], "Test 3 Failed"

# Test 4: Only one sentence, check single sentence behavior
assert flatten_tokens([["single", "sentence"]]) \
    == ["single", "sentence"], "Test 4 Failed"

# Test 5: Multiple sentences of different lengths, check stability of result
assert flatten_tokens([["deep", "learning"], ["rocks"], ["NLP", "is", "fun"]]) == \
["deep", "learning", "rocks", "NLP", "is", "fun"], "Test 5 Failed"

print("✅ All tests passed!")


## 4. One-hot Encoding

In [39]:
from typing import List, Any

def one_hot_encode(labels: List[Any], classes: List[Any]) -> List[List[int]]:
    """
    One-hot encode a list of labels given a list of classes.

    Args:
        labels (List[Any]): List of labels to encode.
        classes (List[Any]): List of all possible classes in order.

    Returns:
        List[List[int]]: One-hot encoded vectors corresponding to labels.
                         Returns an empty list if labels or classes are empty.
    """
    if not labels or not classes:
        return []

    # Build a dictionary for fast class index lookup
    class_to_idx = {cls: i for i, cls in enumerate(classes)}
    vec_len = len(classes)
    result = []

    for lb in labels:
        if lb not in class_to_idx:
            raise ValueError(f"Label '{lb}' not found in classes")
        vec = [0] * vec_len
        vec[class_to_idx[lb]] = 1
        result.append(vec)

    return result


In [None]:
# Test 1: Full list of labels, check order and correct mapping
assert one_hot_encode(["dog", "cat", "bird", "dog"], ["cat", "dog", "bird"])\
    == [[0, 1, 0], [1, 0, 0], [0, 0, 1], [0, 1, 0]], "Test 1 Failed"

# Test 2: Empty label list, result should also be empty
assert one_hot_encode([], ["cat", "dog", "bird"]) \
    == [], "Test 2 Failed"

# Test 3: Only two classes, repeated labels multiple times
assert one_hot_encode(["A", "A", "B"], ["A", "B"]) \
    == [[1, 0], [1, 0], [0, 1]], "Test 3 Failed"

# Test 4: Only one label, check single output
assert one_hot_encode(["cat"], ["cat", "dog", "bird"]) \
    == [[1, 0, 0]], "Test 4 Failed"

# Test 5: Classes with long labels, check that mapping is not 
# affected by string length
assert one_hot_encode(["sad", "happy"], ["happy", "sad", "neutral"]) \
    == [[0, 1, 0], [1, 0, 0]], "Test 5 Failed"
    
print("✅ All tests passed!")


## 5. Filter Out Bounding Box Using Threshold

In [47]:
def filter_low_confidence_boxes(predictions, threshold):
    if not predictions:
        return []
    if not (0 <= threshold <= 1):
        raise ValueError("Threshold must be between 0 and 1")
    
    return [bbox for bbox in predictions if bbox[1] >= threshold]

In [None]:
# Test 1: Two boxes have enough confidence (>= 0.8), one box is filtered out
predictions1 = [[0, 0.95, 10, 10, 50, 50], 
                [1, 0.4, 20, 20, 30, 30], 
                [0, 0.88, 15, 15, 40, 40]]
assert filter_low_confidence_boxes(predictions1, 0.8) == \
[[0, 0.95, 10, 10, 50, 50], [0, 0.88, 15, 15, 40, 40]], "Test 1 Failed"

# Test 2: Threshold higher than all boxes -> result is empty
assert filter_low_confidence_boxes(predictions1, 0.99) == [], "Test 2 Failed"

# Test 3: Empty input data -> output should also be empty
assert filter_low_confidence_boxes([], 0.5) == [], "Test 3 Failed"

# Test 4: One box with confidence exactly equal to threshold -> still kept
predictions2 = [[0, 0.5, 5, 5, 10, 10]]
assert filter_low_confidence_boxes(predictions2, 0.5) \
    == [[0, 0.5, 5, 5, 10, 10]], "Test 4 Failed"

# Test 5: All boxes have enough confidence -> no boxes are filtered out
predictions3 = [[0, 0.85, 1, 2, 3, 4], [1, 0.95, 4, 5, 6, 7]]
assert filter_low_confidence_boxes(predictions3, 0.5) \
    == predictions3, "Test 5 Failed"
    
print("✅ All tests passed!")


## 6. Feature Normalization

In [None]:
def min_max_scale(data):
    """
    Scale a list of numerical values to the range [0, 1] 
    using min-max normalization.

    Args:
        data (List[float]): List of numerical values.

    Returns:
        List[float]: Normalized list of values in [0, 1]. 
        Returns empty list if input is empty.
            If all values are identical, returns a list of zeros.
    """
    if not data:
        return []

    min_val = min(data)
    max_val = max(data)

    if max_val == min_val:
        return [0 for _ in data]

    return [(x - min_val) / (max_val - min_val) for x in data]


In [None]:
# Test 1: Normalize a list of positive integers
assert min_max_scale([10, 20, 50, 30]) == [0.0, 0.25, 1.0, 0.5], "Test 1 Failed"

# Test 2: List containing negative numbers and zero
assert min_max_scale([-10, 0, 10]) == [0.0, 0.5, 1.0], "Test 2 Failed"

# Test 3: All elements are the same, avoid division by zero
assert min_max_scale([5, 5, 5, 5]) == [0.0, 0.0, 0.0, 0.0], "Test 3 Failed"

# Test 4: Empty input list
assert min_max_scale([]) == [], "Test 4 Failed"

# Test 5: Data already in the range [0, 1]
# Note: Due to floating point error, we compare with a small tolerance
scaled_data = min_max_scale([0.1, 0.5, 0.9])
expected_data = [0.0, 0.5, 1.0]
assert all(abs(a - b) < 1e-9 for a, b in \
    zip(scaled_data, expected_data)), "Test 5 Failed"

print("✅ All tests passed!")


## 7. Accuracy Calculation

In [None]:
from typing import Sequence

def calculate_accuracy(y_true: Sequence, y_pred: Sequence) -> float:
    """
    Calculate the accuracy of predictions compared to true labels.

    Args:
        y_true (Sequence): True labels.
        y_pred (Sequence): Predicted labels.

    Returns:
        float: Accuracy as a float between 0 and 1. 
        Returns 0 if input lists are empty or have different lengths.
    """
    if not y_true or not y_pred or len(y_true) != len(y_pred):
        return 0.0

    return sum(1 for true, pred \
        in zip(y_true, y_pred) if true == pred) / len(y_true)


In [None]:
# Test 1: Labels are strings, accuracy 80%
y_true1 = ["cat", "dog", "cat", "bird", "dog"]
y_pred1 = ["cat", "dog", "cat", "dog", "dog"]
assert calculate_accuracy(y_true1, y_pred1) == 0.8, "Test 1 Failed"

# Test 2: Labels are numbers, accuracy 100%
y_true2 = [1, 0, 1, 1, 0]
y_pred2 = [1, 0, 1, 1, 0]
assert calculate_accuracy(y_true2, y_pred2) == 1.0, "Test 2 Failed"

# Test 3: Accuracy 0%
y_true3 = [0, 0, 0]
y_pred3 = [1, 1, 1]
assert calculate_accuracy(y_true3, y_pred3) == 0.0, "Test 3 Failed"

# Test 4: Empty lists
assert calculate_accuracy([], []) == 0.0, "Test 4 Failed"

# Test 5: Accuracy 50%
y_true5 = ["A", "B"]
y_pred5 = ["A", "C"]
assert calculate_accuracy(y_true5, y_pred5) == 0.5, "Test 5 Failed"

print("✅ All tests passed!")


## 8. Time-Series Augmentation

In [55]:
import random

def add_noise_augmentation(time_series, noise_level = 1.5):
    if not time_series:
        return []
    return [x + random.gauss(mu = 0, sigma = noise_level) \
        for x in time_series]

In [None]:
import random

# To ensure reproducible results, set the random seed
random.seed(0)

# Test 1: Add noise to a time series
ts1 = [10, 11, 12, 11, 10]
augmented_ts1 = add_noise_augmentation(ts1, 0.1)
assert len(augmented_ts1) == len(ts1), \
    "Test 1 Failed: Length mismatch"
assert augmented_ts1 != ts1, \
    "Test 1 Failed: Series should be different after adding noise"

# Test 2: noise_level = 0, series should not change
ts2 = [100, 200, 150]
augmented_ts2 = add_noise_augmentation(ts2, 0.0)
assert augmented_ts2 == ts2, \
    "Test 2 Failed: Series should be identical with zero noise"

# Test 3: Empty series
assert add_noise_augmentation([], 0.5) == [], \
    "Test 3 Failed: Empty list should return empty list"

# Test 4: Check that value changes
# Since the result is random, we only check that 
# it is different from the original
ts4 = [5]
augmented_ts4 = add_noise_augmentation(ts4, 1.0)
assert augmented_ts4 != ts4, \
    "Test 4 Failed: Single element should change"
    
print("✅ All tests passed!")


## 9. Experience Replay Buffer


In [60]:
import collections
import random
from typing import Any, List

class ExperienceReplayBuffer:
    """
    Fixed-size buffer to store experience tuples for reinforcement learning.
    Oldest experiences are discarded when the buffer is full.
    """
    
    def __init__(self, capacity: int):
        """
        Initialize the replay buffer.

        Args:
            capacity (int): Maximum number of experiences to store.
        """
        if capacity <= 0:
            raise ValueError("Capacity must be a positive integer")
        self.buffer = collections.deque(maxlen=capacity)

    def add(self, experience: Any) -> None:
        """
        Add a new experience to the buffer.

        Args:
            experience: Typically a tuple like 
            (state, action, reward, next_state, done)
        """
        self.buffer.append(experience)

    def sample(self, batch_size: int) -> List[Any]:
        """
        Sample a batch of experiences from the buffer.

        Args:
            batch_size (int): Number of experiences to sample.

        Returns:
            List of experiences. If batch_size > number of experiences, 
            returns all experiences.
        """
        if batch_size <= 0:
            raise ValueError("batch_size must be a positive integer")
        actual_batch_size = min(batch_size, len(self.buffer))
        return random.sample(self.buffer, actual_batch_size)

    def __len__(self) -> int:
        """Return the current number of experiences stored."""
        return len(self.buffer)

In [None]:
# Test 1: Add and sample
buffer = ExperienceReplayBuffer(capacity=100)
buffer.add(("s1", "a1", 1, "s2"))
buffer.add(("s2", "a2", 0, "s3"))
buffer.add(("s3", "a3", -1, "s4"))
assert len(buffer) == 3, "Test 1 Failed: Incorrect buffer size"
sample = buffer.sample(2)
assert len(sample) == 2, "Test 1 Failed: Incorrect sample size"

# Test 2: Exceeding capacity
buffer = ExperienceReplayBuffer(capacity=3)
for i in range(5):
    buffer.add(i)  # Add 0, 1, 2, 3, 4
assert len(buffer) == 3, \
    "Test 2 Failed: Capacity exceeded but size is wrong"
# Because of FIFO, remaining elements should be 2, 3, 4
assert list(buffer.buffer) == [2, 3, 4], \
    "Test 2 Failed: Old experiences not discarded correctly"

# Test 3: Sampling more than current number of elements
buffer = ExperienceReplayBuffer(capacity=10)
buffer.add(1)
buffer.add(2)
sample = buffer.sample(5)
assert len(sample) == 2, \
    "Test 3 Failed: Should return all elements if batch_size is larger"

# Test 4: Sampling from an empty buffer
buffer = ExperienceReplayBuffer(capacity=10)
sample = buffer.sample(5)
assert sample == [], \
    "Test 4 Failed: Sampling from empty buffer should return empty list"
    
print("✅ All tests passed!")


## 10. Bag-of-Words

In [64]:
from collections import Counter

def create_bow_vectors(corpus, vocabulary):
    """
    Convert a corpus into Bag-of-Words (BoW) vectors.

    Args:
        corpus (list[list[str]]): List of documents, each document is a list of words.
        vocabulary (list[str]): List of unique words that define the vector space.

    Returns:
        list[list[int]]: BoW vectors where each document is represented by 
                         word counts aligned with the vocabulary.
    """
    if not corpus or not vocabulary:
        return []
    
    bow_vectors = []
    for doc in corpus:
        counts = Counter(doc)  # count word frequencies once
        bow_vectors.append([counts.get(word, 0) for word in vocabulary])
    
    return bow_vectors


In [None]:
vocab = ["apple", "banana", "fruit", "orange"]

# Test 1: Two documents with words from the vocabulary
corpus1 = [["apple", "banana", "apple"], ["fruit", "orange"]]
expected1 = [[2, 1, 0, 0], [0, 0, 1, 1]]
assert create_bow_vectors(corpus1, vocab) == expected1, "Test 1 Failed"

# Test 2: Document contains a word not in the vocabulary
corpus2 = [["apple", "grape", "banana"]]  # "grape" is not in vocab
expected2 = [[1, 1, 0, 0]]
assert create_bow_vectors(corpus2, vocab) == expected2, "Test 2 Failed"

# Test 3: One empty document
corpus3 = [[]]
expected3 = [[0, 0, 0, 0]]
assert create_bow_vectors(corpus3, vocab) == expected3, "Test 3 Failed"

# Test 4: Empty corpus
assert create_bow_vectors([], vocab) == [], "Test 4 Failed"

# Test 5: Larger vocabulary, document contains only some words
vocab5 = ["a", "b", "c", "d", "e"]
corpus5 = [["a", "c", "e", "a"]]
expected5 = [[2, 0, 1, 0, 1]]
assert create_bow_vectors(corpus5, vocab5) == expected5, "Test 5 Failed"

print("✅ All tests passed!")
