In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.types import *
from pyspark.sql.functions import *

# Initialize a SparkSession
spark = SparkSession.builder \
    .appName("Create DataFrame Example") \
    .getOrCreate()

# Define the schema
schema = StructType([
    StructField("user_id", IntegerType(), nullable=False),
    StructField("username", StringType(), nullable=False),
    StructField("opening_balance", IntegerType(), nullable=False)
])

# Define the data
data = [
    (100, "Ankit", 1000),
    (101, "Rahul", 9000),
    (102, "Amit", 5000),
    (103, "Agam", 7500)
]

# Create the DataFrame
users_df = spark.createDataFrame(data, schema)


# Define the schema
schema_trans = StructType([
    StructField("id", IntegerType(), nullable=False),
    StructField("from_userid", IntegerType(), nullable=False),
    StructField("to_userid", IntegerType(), nullable=False),
    StructField("amount", IntegerType(), nullable=False)
])

# Define the data
data_trans = [
    (1, 100, 102, 500),
    (2, 102, 101, 700),
    (3, 101, 102, 600),
    (4, 102, 100, 1500),
    (5, 102, 101, 800),
    (6, 102, 101, 300)
]

# Create the DataFrame
transactions_df = spark.createDataFrame(data_trans, schema_trans)
transactions_df.show()
users_df.show()


+---+-----------+---------+------+
| id|from_userid|to_userid|amount|
+---+-----------+---------+------+
|  1|        100|      102|   500|
|  2|        102|      101|   700|
|  3|        101|      102|   600|
|  4|        102|      100|  1500|
|  5|        102|      101|   800|
|  6|        102|      101|   300|
+---+-----------+---------+------+

+-------+--------+---------------+
|user_id|username|opening_balance|
+-------+--------+---------------+
|    100|   Ankit|           1000|
|    101|   Rahul|           9000|
|    102|    Amit|           5000|
|    103|    Agam|           7500|
+-------+--------+---------------+



In [0]:
credit_df=transactions_df.groupby('to_userid').agg(sum(col('amount')).alias('credit'))
debit_df=transactions_df.groupby('from_userid').agg(sum(col('amount')).alias('debit'))
credit_debit_df=credit_df.join(debit_df,on=[credit_df.to_userid==debit_df.from_userid],how='inner')
credit_debit_df=credit_debit_df.select('to_userid','credit','debit')
ans_df=users_df.join(credit_debit_df,on=[users_df.user_id==credit_debit_df.to_userid],how='left')
ans_df=ans_df.select('user_id','username','opening_balance','credit','debit')
ans_df=ans_df.withColumn('closing_balance',(ans_df['opening_balance'] + ans_df['credit'])-ans_df['debit'])
ans_df=ans_df.withColumn('closing_balance', when(col('closing_balance').isNull(), col('opening_balance')).otherwise(col('closing_balance')))
ans_df=ans_df.select('user_id','username','closing_balance')
ans_df=ans_df.orderBy(col('closing_balance'))
ans_df.show()


+-------+--------+---------------+
|user_id|username|closing_balance|
+-------+--------+---------------+
|    100|   Ankit|           2000|
|    102|    Amit|           2800|
|    103|    Agam|           7500|
|    101|   Rahul|          10200|
+-------+--------+---------------+

