In [3]:
from pyspark.sql import Window
from pyspark.sql.functions import *
from pyspark.sql import SparkSession
from pyspark.sql.types import StructField, StructType, StringType, IntegerType, TimestampType

# Sample data
data = [
    ("A", "2024-01-01", 1),
    ("A", "2024-01-02", 0),
    ("A", "2024-01-03", 1),
    ("A", "2024-01-04", 0),
    ("A", "2024-01-05", 0),
    ("B", "2024-01-01", 1),
    ("B", "2024-01-02", 0),
    ("B", "2024-01-03", 1)
]

spark = SparkSession.builder.appName("GE_Test").getOrCreate()

df = spark.createDataFrame(data, ["group", "date", "valid_flag"])

# Window specification
window_spec = Window.partitionBy("group").orderBy("date")

# Using rowsBetween(1, Window.unboundedFollowing)
df_with_future = df.withColumn(
    "has_future_valid",
    last(
        when(col("valid_flag") == 1, True),
        ignorenulls=True
    ).over(window_spec.rowsBetween(1, Window.unboundedFollowing))
)

df_with_future.show()

[Stage 0:>                                                          (0 + 8) / 8]

+-----+----------+----------+----------------+
|group|      date|valid_flag|has_future_valid|
+-----+----------+----------+----------------+
|    A|2024-01-01|         1|            true|
|    A|2024-01-02|         0|            true|
|    A|2024-01-03|         1|            null|
|    A|2024-01-04|         0|            null|
|    A|2024-01-05|         0|            null|
|    B|2024-01-01|         1|            true|
|    B|2024-01-02|         0|            true|
|    B|2024-01-03|         1|            null|
+-----+----------+----------+----------------+



                                                                                

In [4]:
# Alternative approach using lead
df_optimized = df.withColumn(
    "next_valid",
    lead(col("valid_flag"), 1).over(window_spec)
).withColumn(
    "has_future_valid",
    when(
        col("next_valid") == 1,
        True
    ).when(
        lead(col("next_valid"), 1).over(window_spec) == 1, 
        True
    ).otherwise(None)
)

df_optimized.show()

+-----+----------+----------+----------+----------------+
|group|      date|valid_flag|next_valid|has_future_valid|
+-----+----------+----------+----------+----------------+
|    A|2024-01-01|         1|         0|            true|
|    A|2024-01-02|         0|         1|            true|
|    A|2024-01-03|         1|         0|            null|
|    A|2024-01-04|         0|         0|            null|
|    A|2024-01-05|         0|      null|            null|
|    B|2024-01-01|         1|         0|            true|
|    B|2024-01-02|         0|         1|            true|
|    B|2024-01-03|         1|      null|            null|
+-----+----------+----------+----------+----------------+

