Użyj każdą z tych funkcji 
* `unix_timestamp()` 
* `date_format()`
* `to_unix_timestamp()`
* `from_unixtime()`
* `to_date()` 
* `to_timestamp()` 
* `from_utc_timestamp()` 
* `to_utc_timestamp()`

In [6]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import current_date, current_timestamp

# 🔹 Inicjalizacja SparkSession (to jest najważniejsze!)
spark = SparkSession.builder.appName("TestApp").getOrCreate()

# 🔹 Dane
kolumny = ["timestamp", "unix", "Date"]
dane = [
    ("2015-03-22T14:13:34", 1646641525847, "May, 2021"),
    ("2015-03-22T15:03:18", 1646641557555, "Mar, 2021"),
    ("2015-03-22T14:38:39", 1646641578622, "Jan, 2021")
]

# 🔹 Tworzenie DataFrame
dataFrame = spark.createDataFrame(dane, kolumny) \
    .withColumn("current_date", current_date()) \
    .withColumn("current_timestamp", current_timestamp())

# 🔹 Wyświetlenie danych
dataFrame.show()


+-------------------+-------------+---------+------------+--------------------+
|          timestamp|         unix|     Date|current_date|   current_timestamp|
+-------------------+-------------+---------+------------+--------------------+
|2015-03-22T14:13:34|1646641525847|May, 2021|  2025-03-12|2025-03-12 16:28:...|
|2015-03-22T15:03:18|1646641557555|Mar, 2021|  2025-03-12|2025-03-12 16:28:...|
|2015-03-22T14:38:39|1646641578622|Jan, 2021|  2025-03-12|2025-03-12 16:28:...|
+-------------------+-------------+---------+------------+--------------------+



In [7]:

dataFrame.printSchema()

root
 |-- timestamp: string (nullable = true)
 |-- unix: long (nullable = true)
 |-- Date: string (nullable = true)
 |-- current_date: date (nullable = false)
 |-- current_timestamp: timestamp (nullable = false)



## unix_timestamp(..) & cast(..)

Konwersja **string** to a **timestamp**.

Lokalizacja funkcji 
* `pyspark.sql.functions` in the case of Python
* `org.apache.spark.sql.functions` in the case of Scala & Java

## 1. Zmiana formatu wartości timestamp yyyy-MM-dd'T'HH:mm:ss 
`unix_timestamp(..)`

Dokumentacja API `unix_timestamp(..)`:
> Convert time string with given pattern (see <a href="http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html" target="_blank">SimpleDateFormat</a>) to Unix time stamp (in seconds), return null if fail.

`SimpleDataFormat` is part of the Java API and provides support for parsing and formatting date and time values.

In [None]:
from pyspark.sql.functions import unix_timestamp

zmianaFormatu = dataFrame.withColumn("unix_timestamp", unix_timestamp("timestamp", "yyyy-MM-dd'T'HH:mm:ss"))
zmianaFormatu.printSchema()
zmianaFormatu.show()

root
 |-- timestamp: string (nullable = true)
 |-- unix: long (nullable = true)
 |-- Date: string (nullable = true)
 |-- current_date: date (nullable = false)
 |-- current_timestamp: timestamp (nullable = false)
 |-- unix_timestamp: long (nullable = true)

+-------------------+-------------+---------+------------+--------------------+--------------+
|          timestamp|         unix|     Date|current_date|   current_timestamp|unix_timestamp|
+-------------------+-------------+---------+------------+--------------------+--------------+
|2015-03-22T14:13:34|1646641525847|May, 2021|  2025-03-12|2025-03-12 16:29:...|    1427030014|
|2015-03-22T15:03:18|1646641557555|Mar, 2021|  2025-03-12|2025-03-12 16:29:...|    1427032998|
|2015-03-22T14:38:39|1646641578622|Jan, 2021|  2025-03-12|2025-03-12 16:29:...|    1427031519|
+-------------------+-------------+---------+------------+--------------------+--------------+



2. Zmień format zgodnie z klasą `SimpleDateFormat`**yyyy-MM-dd HH:mm:ss**
  * a. Wyświetl schemat i dane żeby sprawdzicz czy wartości się zmieniły

In [14]:

zmianaFormatu.withColumn("formatted_timestamp", unix_timestamp("timestamp", "yyyy-MM-dd'T'HH:mm:ss").cast("timestamp"))

zmianaFormatu.printSchema()

root
 |-- timestamp: string (nullable = true)
 |-- unix: long (nullable = true)
 |-- Date: string (nullable = true)
 |-- current_date: date (nullable = false)
 |-- current_timestamp: timestamp (nullable = false)
 |-- unix_timestamp: long (nullable = true)
 |-- formatted_timestamp: timestamp (nullable = true)



In [15]:
#unix_timestamp
tempE = zmianaFormatu.withColumn("unix_timestamp", unix_timestamp("timestamp", "yyyy-MM-dd'T'HH:mm:ss"))
display(tempE)

DataFrame[timestamp: string, unix: bigint, Date: string, current_date: date, current_timestamp: timestamp, unix_timestamp: bigint, formatted_timestamp: timestamp]

## Stwórz nowe kolumny do DataFrame z wartościami year(..), month(..), dayofyear(..)

In [29]:
from pyspark.sql.functions import date_format
#date_format
yearDate = zmianaFormatu.withColumn("year", date_format("timestamp", "yyyy"))
display(yearDate)
monthDate = zmianaFormatu.withColumn("month", date_format("timestamp", "MM"))
display(monthDate)
dayofyearDate = zmianaFormatu.withColumn("dayofyear", date_format("timestamp", "D"))
display(dayofyearDate)

DataFrame[timestamp: string, unix: bigint, Date: string, current_date: date, current_timestamp: timestamp, unix_timestamp: bigint, formatted_timestamp: timestamp, year: string]

DataFrame[timestamp: string, unix: bigint, Date: string, current_date: date, current_timestamp: timestamp, unix_timestamp: bigint, formatted_timestamp: timestamp, month: string]

DataFrame[timestamp: string, unix: bigint, Date: string, current_date: date, current_timestamp: timestamp, unix_timestamp: bigint, formatted_timestamp: timestamp, dayofyear: string]

In [30]:
from pyspark.sql.functions import to_date
#to_date()
toDate = zmianaFormatu.withColumn("date_column", to_date("Date", "MMM, yyyy"))
display(toDate)

DataFrame[timestamp: string, unix: bigint, Date: string, current_date: date, current_timestamp: timestamp, unix_timestamp: bigint, formatted_timestamp: timestamp, date_column: date]

In [31]:
from pyspark.sql.functions import from_unixtime
#from_unixtime()
fromUnix = zmianaFormatu.withColumn("from_unixtime", from_unixtime("unix_timestamp"))
display(fromUnix)
fromUnix.show()

DataFrame[timestamp: string, unix: bigint, Date: string, current_date: date, current_timestamp: timestamp, unix_timestamp: bigint, formatted_timestamp: timestamp, from_unixtime: string]

+-------------------+-------------+---------+------------+--------------------+--------------+-------------------+-------------------+
|          timestamp|         unix|     Date|current_date|   current_timestamp|unix_timestamp|formatted_timestamp|      from_unixtime|
+-------------------+-------------+---------+------------+--------------------+--------------+-------------------+-------------------+
|2015-03-22T14:13:34|1646641525847|May, 2021|  2025-03-12|2025-03-12 16:50:...|    1427030014|2015-03-22 14:13:34|2015-03-22 14:13:34|
|2015-03-22T15:03:18|1646641557555|Mar, 2021|  2025-03-12|2025-03-12 16:50:...|    1427032998|2015-03-22 15:03:18|2015-03-22 15:03:18|
|2015-03-22T14:38:39|1646641578622|Jan, 2021|  2025-03-12|2025-03-12 16:50:...|    1427031519|2015-03-22 14:38:39|2015-03-22 14:38:39|
+-------------------+-------------+---------+------------+--------------------+--------------+-------------------+-------------------+



In [33]:
from pyspark.sql.functions import to_timestamp
#to_timestamp()
toTimestamp = zmianaFormatu.withColumn("to_timestamp", to_timestamp("timestamp", "yyyy-MM-dd'T'HH:mm:ss"))
display(toTimestamp)

DataFrame[timestamp: string, unix: bigint, Date: string, current_date: date, current_timestamp: timestamp, unix_timestamp: bigint, formatted_timestamp: timestamp, to_timestamp: timestamp]

In [34]:
from pyspark.sql.functions import to_utc_timestamp
#to_utc_timestamp()
toUtcTimestamp = zmianaFormatu.withColumn("to_utc_timestamp", to_utc_timestamp("timestamp", "UTC"))
display(toUtcTimestamp)

DataFrame[timestamp: string, unix: bigint, Date: string, current_date: date, current_timestamp: timestamp, unix_timestamp: bigint, formatted_timestamp: timestamp, to_utc_timestamp: timestamp]

In [35]:
from pyspark.sql.functions import from_utc_timestamp
# from_utc_timestamp()
fromUtcTimestamp = zmianaFormatu.withColumn("from_utc_timestamp", from_utc_timestamp("timestamp", "UTC"))
display(fromUtcTimestamp)

DataFrame[timestamp: string, unix: bigint, Date: string, current_date: date, current_timestamp: timestamp, unix_timestamp: bigint, formatted_timestamp: timestamp, from_utc_timestamp: timestamp]