In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import udf
from pyspark.sql.types import DoubleType

# Initialize Spark session
spark = SparkSession.builder.appName("Calculator").getOrCreate()


In [0]:
# Define the calculator functions
def add(x, y):
    return x + y

def subtract(x, y):
    return x - y

def multiply(x, y):
    return x * y

def divide(x, y):
    if y == 0:
        return None  # Handle division by zero
    return x / y

# Register these functions as UDFs (User Defined Functions) in PySpark
add_udf = udf(add, DoubleType())
subtract_udf = udf(subtract, DoubleType())
multiply_udf = udf(multiply, DoubleType())
divide_udf = udf(divide, DoubleType())


In [0]:
# Create a sample DataFrame with two columns 'x' and 'y'
data = [(10, 5), (20, 0), (15, 3), (50, 25)]
columns = ["x", "y"]
df = spark.createDataFrame(data, columns)

# Show the original DataFrame
df.show()


+---+---+
|  x|  y|
+---+---+
| 10|  5|
| 20|  0|
| 15|  3|
| 50| 25|
+---+---+



In [0]:
# Apply the calculator functions to the DataFrame
# Add
df_with_addition = df.withColumn("addition", add_udf(df["x"], df["y"]))

# Subtract
df_with_subtraction = df_with_addition.withColumn("subtraction", subtract_udf(df["x"], df["y"]))

# Multiply
df_with_multiplication = df_with_subtraction.withColumn("multiplication", multiply_udf(df["x"], df["y"]))

# Divide
df_with_division = df_with_multiplication.withColumn("division", divide_udf(df["x"], df["y"]))

# Show the DataFrame with all operations
df_with_division.show()


+---+---+--------+-----------+--------------+--------+
|  x|  y|addition|subtraction|multiplication|division|
+---+---+--------+-----------+--------------+--------+
| 10|  5|    null|       null|          null|     2.0|
| 20|  0|    null|       null|          null|    null|
| 15|  3|    null|       null|          null|     5.0|
| 50| 25|    null|       null|          null|     2.0|
+---+---+--------+-----------+--------------+--------+



In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import expr

# Initialize Spark session
spark = SparkSession.builder.appName("Calculator").getOrCreate()


In [0]:
# Create a sample DataFrame with two columns 'x' and 'y'
data = [(10, 5), (20, 0), (15, 3), (50, 25)]
columns = ["x", "y"]
df = spark.createDataFrame(data, columns)


In [0]:
# Apply the calculator functions directly using SQL expressions
df_with_operations = df.selectExpr(
    "x",
    "y",
    "x + y as addition",
    "x - y as subtraction",
    "x * y as multiplication",
    "CASE WHEN y = 0 THEN NULL ELSE x / y END as division"
)

# Show the DataFrame with all operations
df_with_operations.show()


+---+---+--------+-----------+--------------+--------+
|  x|  y|addition|subtraction|multiplication|division|
+---+---+--------+-----------+--------------+--------+
| 10|  5|      15|          5|            50|     2.0|
| 20|  0|      20|         20|             0|    null|
| 15|  3|      18|         12|            45|     5.0|
| 50| 25|      75|         25|          1250|     2.0|
+---+---+--------+-----------+--------------+--------+



In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import pandas_udf
from pyspark.sql.types import DoubleType
import pandas as pd

# Initialize Spark session
spark = SparkSession.builder.appName("Calculator").getOrCreate()




In [0]:
# Define the calculator functions
def add(x: pd.Series, y: pd.Series) -> pd.Series:
    return x + y

def subtract(x: pd.Series, y: pd.Series) -> pd.Series:
    return x - y

def multiply(x: pd.Series, y: pd.Series) -> pd.Series:
    return x * y

def divide(x: pd.Series, y: pd.Series) -> pd.Series:
    return x / y.where(y != 0, None)  # Handle division by zero by returning None


In [0]:
# Register the functions as Pandas UDFs
add_udf = pandas_udf(add, DoubleType())
subtract_udf = pandas_udf(subtract, DoubleType())
multiply_udf = pandas_udf(multiply, DoubleType())
divide_udf = pandas_udf(divide, DoubleType())


In [0]:
# Create a sample DataFrame with two columns 'x' and 'y'
data = [(10, 5), (20, 0), (15, 3), (50, 25)]
columns = ["x", "y"]
df = spark.createDataFrame(data, columns)

# Show the original DataFrame
df.show()


+---+---+
|  x|  y|
+---+---+
| 10|  5|
| 20|  0|
| 15|  3|
| 50| 25|
+---+---+



In [0]:
# Apply the calculator functions to the DataFrame
df_with_addition = df.withColumn("addition", add_udf(df["x"], df["y"]))
df_with_subtraction = df_with_addition.withColumn("subtraction", subtract_udf(df["x"], df["y"]))
df_with_multiplication = df_with_subtraction.withColumn("multiplication", multiply_udf(df["x"], df["y"]))
df_with_division = df_with_multiplication.withColumn("division", divide_udf(df["x"], df["y"]))

# Show the DataFrame with all operations
df_with_division.show()


+---+---+--------+-----------+--------------+--------+
|  x|  y|addition|subtraction|multiplication|division|
+---+---+--------+-----------+--------------+--------+
| 10|  5|    15.0|        5.0|          50.0|     2.0|
| 20|  0|    20.0|       20.0|           0.0|    null|
| 15|  3|    18.0|       12.0|          45.0|     5.0|
| 50| 25|    75.0|       25.0|        1250.0|     2.0|
+---+---+--------+-----------+--------------+--------+

