In [3]:
from pyspark.sql import SparkSession

# Create a Spark session
spark = SparkSession.builder.appName("PySparkExample").getOrCreate()

In [5]:
# Create a DataFrame
data = [("Alice", 1), ("Bob", 2), ("Charlie", 3)]
columns = ["Name", "Value"]
df = spark.createDataFrame(data, columns)

# Show the DataFrame
print("Original DataFrame:")
df.show()

# Perform a simple transformation
df_transformed = df.withColumn("DoubledValue", df["Value"] * 2)

# Show the transformed DataFrame
print("Transformed DataFrame:")
df_transformed.show()

# Perform a simple aggregation
total_value = df_transformed.agg({"Value": "sum"}).collect()[0][0]
print(f"Total Value: {total_value}")

Original DataFrame:
+-------+-----+
|   Name|Value|
+-------+-----+
|  Alice|    1|
|    Bob|    2|
|Charlie|    3|
+-------+-----+

Transformed DataFrame:
+-------+-----+------------+
|   Name|Value|DoubledValue|
+-------+-----+------------+
|  Alice|    1|           2|
|    Bob|    2|           4|
|Charlie|    3|           6|
+-------+-----+------------+

Total Value: 6
Spark version: 3.5.0


In [8]:
# Get Spark version
spark_version = spark.version
print("Spark version:", spark_version)

Spark version: 3.5.0


In [None]:
# Stop the Spark session
spark.stop()