In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.window import Window
from pyspark.sql.functions import col, sum as _sum

spark = SparkSession.builder.appName("CumulativeRuns").getOrCreate()

data = [
    (1, 1, 1, 1),
    (1, 1, 2, 0),
    (1, 1, 3, 4),
    (1, 1, 4, 2),
    (1, 2, 1, 6),
    (1, 2, 2, 1)
]

columns = ["match_id", "inning", "ball_no", "runs"]
df = spark.createDataFrame(data, columns)

windowSpec = Window.partitionBy("match_id", "inning").orderBy("ball_no").rowsBetween(Window.unboundedPreceding, Window.currentRow)

df_with_cumulative = df.withColumn("cumulative_runs", _sum("runs").over(windowSpec))

df_with_cumulative.show()


+--------+------+-------+----+---------------+
|match_id|inning|ball_no|runs|cumulative_runs|
+--------+------+-------+----+---------------+
|       1|     1|      1|   1|              1|
|       1|     1|      2|   0|              1|
|       1|     1|      3|   4|              5|
|       1|     1|      4|   2|              7|
|       1|     2|      1|   6|              6|
|       1|     2|      2|   1|              7|
+--------+------+-------+----+---------------+



In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import lit
from pyspark.sql.types import StructType, StructField, StringType

data = [
    ("Bumrah to Dhawan, FOUR! Cracking shot through covers", "Bumrah", "Dhawan", "FOUR"),
    ("Chahal to Raina, 1 run, nudged to midwicket", "Chahal", "Raina", "1 run"),
    ("Narine to Kohli, no run, defended solidly", "Narine", "Kohli", "no run"),
    ("Rabada to Rohit, SIX! Smashed over long-on", "Rabada", "Rohit", "SIX")
]

schema = StructType([
    StructField("commentary", StringType(), True),
    StructField("bowler", StringType(), True),
    StructField("batsman", StringType(), True),
    StructField("runs", StringType(), True)
])

commentary_df = spark.createDataFrame(data, schema)

from pyspark.sql.functions import monotonically_increasing_id

commentary_df = commentary_df.withColumn("match_id", lit(1)) \
                             .withColumn("inning", lit(1)) \
                             .withColumn("ball_no", monotonically_increasing_id())

commentary_df.show(truncate=False)



+----------------------------------------------------+------+-------+------+--------+------+-----------+
|commentary                                          |bowler|batsman|runs  |match_id|inning|ball_no    |
+----------------------------------------------------+------+-------+------+--------+------+-----------+
|Bumrah to Dhawan, FOUR! Cracking shot through covers|Bumrah|Dhawan |FOUR  |1       |1     |8589934592 |
|Chahal to Raina, 1 run, nudged to midwicket         |Chahal|Raina  |1 run |1       |1     |25769803776|
|Narine to Kohli, no run, defended solidly           |Narine|Kohli  |no run|1       |1     |42949672960|
|Rabada to Rohit, SIX! Smashed over long-on          |Rabada|Rohit  |SIX   |1       |1     |60129542144|
+----------------------------------------------------+------+-------+------+--------+------+-----------+



In [0]:
from pyspark.sql.functions import when, col

commentary_df = commentary_df.withColumn(
    "runs",
    when(col("runs").like("%FOUR%"), 4)
    .when(col("runs").like("%SIX%"), 6)
    .when(col("runs").like("%1 run%"), 1)
    .when(col("runs").like("%2 run%"), 2)
    .when(col("runs").like("%3 run%"), 3)
    .when(col("runs").like("%no run%"), 0)
    .otherwise(0)
)


In [0]:
from pyspark.sql.window import Window
from pyspark.sql.functions import sum as _sum

windowSpec = Window.partitionBy("match_id", "inning").orderBy("ball_no").rowsBetween(Window.unboundedPreceding, Window.currentRow)

ball_df_with_cumulative = commentary_df.withColumn("cumulative_runs", _sum("runs").over(windowSpec))

ball_df_with_cumulative.select("match_id", "inning", "ball_no", "batsman", "runs", "cumulative_runs").show()


+--------+------+-----------+-------+----+---------------+
|match_id|inning|    ball_no|batsman|runs|cumulative_runs|
+--------+------+-----------+-------+----+---------------+
|       1|     1| 8589934592| Dhawan|   4|              4|
|       1|     1|25769803776|  Raina|   1|              5|
|       1|     1|42949672960|  Kohli|   0|              5|
|       1|     1|60129542144|  Rohit|   6|             11|
+--------+------+-----------+-------+----+---------------+

