In [None]:
pyspark.ml.functions.array_to_vector
pyspark.ml.functions.array_to_vector(col)[source]
Converts a column of array of numeric type into a column of dense vectors in MLlib

New in version 3.1.0.

Parameters
colpyspark.sql.Column or str
Input column

Returns
pyspark.sql.Column
The converted column of MLlib dense vectors.

from pyspark.ml.functions import array_to_vector
df1 = spark.createDataFrame([([1.5, 2.5],),], schema='v1 array<double>')
df1.select(array_to_vector('v1').alias('vec1')).collect()

df2 = spark.createDataFrame([([1.5, 3.5],),], schema='v1 array<float>')
df2.select(array_to_vector('v1').alias('vec1')).collect()

df3 = spark.createDataFrame([([1, 3],),], schema='v1 array<int>')
df3.select(array_to_vector('v1').alias('vec1')).collect()

```python
pyspark.ml.functions.vector_to_array(col, dtype='float64')
```

In [None]:
Converts a column of MLlib sparse/dense vectors into a column of dense arrays.

New in version 3.0.0.

Parameters
colpyspark.sql.Column or str
Input column

dtypestr, optional
The data type of the output array. Valid values: “float64” or “float32”.

Returns
pyspark.sql.Column
The converted column of dense arrays.

In [1]:
from pyspark.ml.linalg import Vectors
from pyspark.ml.functions import vector_to_array
from pyspark.mllib.linalg import Vectors as OldVectors

In [2]:
df = spark.createDataFrame([
    (Vectors.dense(1.0, 2.0, 3.0), OldVectors.dense(10.0, 20.0, 30.0)),
    (Vectors.sparse(3, [(0, 2.0), (2, 3.0)]), OldVectors.sparse(3, [(0, 20.0), (2, 30.0)]))],
    ["vec", "oldVec"])
df.show()

+-------------------+--------------------+
|                vec|              oldVec|
+-------------------+--------------------+
|      [1.0,2.0,3.0]|    [10.0,20.0,30.0]|
|(3,[0,2],[2.0,3.0])|(3,[0,2],[20.0,30...|
+-------------------+--------------------+



In [4]:
df_array = df.select(
    vector_to_array("vec").alias("vec"),
    vector_to_array("oldVec").alias("oldVec"))
df_array.show()

+---------------+------------------+
|            vec|            oldVec|
+---------------+------------------+
|[1.0, 2.0, 3.0]|[10.0, 20.0, 30.0]|
|[2.0, 0.0, 3.0]| [20.0, 0.0, 30.0]|
+---------------+------------------+



In [6]:
df_array = df.select(vector_to_array("vec", "float32").alias("vec"),
                vector_to_array("oldVec", "float32").alias("oldVec"))
df_array.show()

+---------------+------------------+
|            vec|            oldVec|
+---------------+------------------+
|[1.0, 2.0, 3.0]|[10.0, 20.0, 30.0]|
|[2.0, 0.0, 3.0]| [20.0, 0.0, 30.0]|
+---------------+------------------+



---