In [0]:
from pyspark.sql import SparkSession
from pyspark.ml.regression import LinearRegression
from pyspark.ml.feature import VectorAssembler


In [0]:
# Create a SparkSession
spark = SparkSession.builder.getOrCreate()


In [0]:
# Example data
X = [1, 2, 3, 4, 5]  # Input features
y = [2, 4, 6, 8, 10]  # Target values

In [0]:
# Convert data to a PySpark DataFrame
data = spark.createDataFrame(list(zip(X, y)), ["features", "label"])
data.show()

+--------+-----+
|features|label|
+--------+-----+
|       1|    2|
|       2|    4|
|       3|    6|
|       4|    8|
|       5|   10|
+--------+-----+



In [0]:
# Create a vector assembler
vectorAssembler = VectorAssembler(inputCols=["features"], outputCol="features_vector")
# Transform the data using the vector assembler
data = vectorAssembler.transform(data)
data.show()

+--------+-----+---------------+
|features|label|features_vector|
+--------+-----+---------------+
|       1|    2|          [1.0]|
|       2|    4|          [2.0]|
|       3|    6|          [3.0]|
|       4|    8|          [4.0]|
|       5|   10|          [5.0]|
+--------+-----+---------------+



In [0]:
# Create a Linear Regression model
lr = LinearRegression(featuresCol="features_vector", labelCol="label")

# Fit the model to the data
model = lr.fit(data)

# Get the coefficients
intercept = model.intercept
slope = model.coefficients[0]

# Print the coefficients
print("Intercept:", intercept)
print("Slope:", slope)


Intercept: -6.908406818208612e-15
Slope: 2.000000000000002
