# 1. Przygotowanie data frame

In [2]:
from pyspark.shell import spark
from pyspark.sql import functions as F

cols = ["timestamp", "unix", "Date"]
data = [("2015-03-22T14:13:34", 1646641525847, "May, 2021"),
        ("2015-03-22T15:03:18", 1646641557555, "Mar, 2021"),
        ("2015-03-22T14:38:39", 1646641578622, "Jan, 2021")]

dataFrame = spark.createDataFrame(data, cols) \
    .withColumn("current_date", F.current_date()) \
    .withColumn("current_timestamp", F.current_timestamp())

dataFrame.show()

+-------------------+-------------+---------+------------+--------------------+
|          timestamp|         unix|     Date|current_date|   current_timestamp|
+-------------------+-------------+---------+------------+--------------------+
|2015-03-22T14:13:34|1646641525847|May, 2021|  2025-03-18|2025-03-18 20:13:...|
|2015-03-22T15:03:18|1646641557555|Mar, 2021|  2025-03-18|2025-03-18 20:13:...|
|2015-03-22T14:38:39|1646641578622|Jan, 2021|  2025-03-18|2025-03-18 20:13:...|
+-------------------+-------------+---------+------------+--------------------+



# 2. Konwersja

In [3]:
dataFrame.select(F.unix_timestamp("timestamp", "yyyy-MM-dd'T'HH:mm:ss").alias("unix_converted"), "unix").show()

+--------------+-------------+
|unix_converted|         unix|
+--------------+-------------+
|    1427030014|1646641525847|
|    1427032998|1646641557555|
|    1427031519|1646641578622|
+--------------+-------------+



In [4]:
updatedSchema = (dataFrame.withColumn("timestamp", F.date_format("timestamp", "yyyy-MM-dd HH:mm:ss"))
                 .withColumn("current_timestamp", F.date_format("current_timestamp", "yyyy-MM-dd HH:mm:ss")))

updatedSchema.printSchema()

root
 |-- timestamp: string (nullable = true)
 |-- unix: long (nullable = true)
 |-- Date: string (nullable = true)
 |-- current_date: date (nullable = false)
 |-- current_timestamp: string (nullable = false)



In [5]:
updatedSchema.show()

+-------------------+-------------+---------+------------+-------------------+
|          timestamp|         unix|     Date|current_date|  current_timestamp|
+-------------------+-------------+---------+------------+-------------------+
|2015-03-22 14:13:34|1646641525847|May, 2021|  2025-03-18|2025-03-18 20:13:10|
|2015-03-22 15:03:18|1646641557555|Mar, 2021|  2025-03-18|2025-03-18 20:13:10|
|2015-03-22 14:38:39|1646641578622|Jan, 2021|  2025-03-18|2025-03-18 20:13:10|
+-------------------+-------------+---------+------------+-------------------+



In [7]:
tempE = (dataFrame.withColumn(
    "unix_converted",
    F.to_unix_timestamp(
        "timestamp",
        F.lit("yyyy-MM-dd'T'HH:mm:ss")
    )
))

display(tempE)

DataFrame[timestamp: string, unix: bigint, Date: string, current_date: date, current_timestamp: timestamp, unix_converted: bigint]