In [134]:
from typing import Tuple, List

from pyspark.sql import DataFrame, SparkSession
from pyspark.sql import functions as F
from pyspark.sql.types import (
    ArrayType,
    BooleanType,
    IntegerType,
    StructField,
    StructType,
)

In [135]:
spark = SparkSession.builder.appName("advent-of-code-2024").getOrCreate()

In [136]:
def read_input(input_file_name: str) -> Tuple[DataFrame, int]:
    num_cols = (
        spark.read.text(input_file_name)
        .withColumn("num_cols", F.size(F.split(F.col("value"), " ")))
        .agg(F.max(F.col("num_cols")))
        .collect()[0][0]
    )
    schema = StructType(
        [StructField(f"value_{idx}", IntegerType()) for idx in range(num_cols)]
    )
    return (
        spark.read.option("delimiter", " ").csv(
            input_file_name, header=False, schema=schema
        )
    ), num_cols


def part_1(input_file_name: str) -> int:
    input_df, num_cols = read_input(input_file_name)
    num_diffs = num_cols - 1

    return (
        input_df.withColumns(
            {
                f"diff_{idx}": (F.col(f"value_{idx+1}") - F.col(f"value_{idx}"))
                for idx in range(num_diffs)
            }
        )
        .withColumn("min", F.least(*[F.col(f"diff_{idx}") for idx in range(num_diffs)]))
        .withColumn(
            "max", F.greatest(*[F.col(f"diff_{idx}") for idx in range(num_diffs)])
        )
        .withColumn(
            "min_abs",
            F.least(*[F.abs(F.col(f"diff_{idx}")) for idx in range(num_diffs)]),
        )
        .withColumn(
            "max_abs",
            F.greatest(*[F.abs(F.col(f"diff_{idx}")) for idx in range(num_diffs)]),
        )
        .filter(
            (F.col("min") * F.col("max") > 0)
            & (F.col("min_abs") >= 1)
            & (F.col("max_abs") <= 3)
        )
        .count()
    )


assert part_1("test-input.txt") == 2

print(f'Solution: {part_1("input.txt")}')

Solution: 314


In [144]:
def is_safe(values: List[int]) -> bool:
    differences = [b - a for a, b in zip(values, values[1:])]
    return all(
        [difference >= 1 and difference <= 3 for difference in differences]
    ) or all([difference >= -3 and difference <= -1 for difference in differences])


@F.udf(BooleanType())
def is_safe_using_dampener(values: List[int]) -> bool:
    if is_safe(values):
        return True
    else:
        for idx in range(len(values)):
            if is_safe(values[:idx] + values[idx + 1 :]):
                return True
    return False


def part_2(input_file_name: str) -> int:
    return (
        spark.read.text(input_file_name)
        .withColumn(
            "values", F.split(F.col("value"), " ").cast(ArrayType(IntegerType()))
        )
        .withColumn("is_safe", is_safe_using_dampener(F.col("values")))
        .filter(F.col("is_safe") == True)
        .count()
    )


assert part_2("test-input.txt") == 4

print(f'Solution: {part_2("input.txt")}')

Solution: 373
