Użyj każdą z tych funkcji 
* `unix_timestamp()` 
* `date_format()`
* `to_unix_timestamp()`
* `from_unixtime()`
* `to_date()` 
* `to_timestamp()` 
* `from_utc_timestamp()` 
* `to_utc_timestamp()`

In [0]:
from pyspark.sql.functions import *

data = [("2015-03-22T14:13:34", 1646641525847, "May, 2021"),
        ("2015-03-22T15:03:18", 1646641557555, "Mar, 2021"),
        ("2015-03-22T14:38:39", 1646641578622, "Jan, 2021")]
columns = ["timestamp", "unix", "Date"]

df = spark.createDataFrame(data, columns)
df = df. \
    withColumn("unix_timestamp", unix_timestamp()). \
    withColumn("date_format", date_format("timestamp", "dd/MM/yyyy")). \
    withColumn("to_unix_timestamp", to_unix_timestamp("current_date", lit("yyyy-MM-dd"))). \
    withColumn("from_unixtime", from_unixtime("unix_timestamp")). \
    withColumn("to_date", to_date("timestamp")). \
    withColumn("to_timestamp", to_timestamp("from_unixtime")). \
    withColumn("from_utc_timestamp", from_utc_timestamp("timestamp", tz="UTC")). \
    withColumn("to_utc_timestamp", to_utc_timestamp("from_unixtime", tz="UTC"))

display(df)


timestamp,unix,Date,unix_timestamp,date_format,to_unix_timestamp,from_unixtime,to_date,to_timestamp,from_utc_timestamp,to_utc_timestamp
2015-03-22T14:13:34,1646641525847,"May, 2021",1741794073,22/03/2015,1741737600,2025-03-12 15:41:13,2015-03-22,2025-03-12T15:41:13Z,2015-03-22T14:13:34Z,2025-03-12T15:41:13Z
2015-03-22T15:03:18,1646641557555,"Mar, 2021",1741794073,22/03/2015,1741737600,2025-03-12 15:41:13,2015-03-22,2025-03-12T15:41:13Z,2015-03-22T15:03:18Z,2025-03-12T15:41:13Z
2015-03-22T14:38:39,1646641578622,"Jan, 2021",1741794073,22/03/2015,1741737600,2025-03-12 15:41:13,2015-03-22,2025-03-12T15:41:13Z,2015-03-22T14:38:39Z,2025-03-12T15:41:13Z


In [0]:
df.printSchema()

root
 |-- timestamp: string (nullable = true)
 |-- unix: long (nullable = true)
 |-- Date: string (nullable = true)
 |-- unix_timestamp: long (nullable = true)
 |-- date_format: string (nullable = true)
 |-- to_unix_timestamp: long (nullable = true)
 |-- from_unixtime: string (nullable = true)
 |-- to_date: date (nullable = true)
 |-- to_timestamp: timestamp (nullable = true)
 |-- from_utc_timestamp: timestamp (nullable = true)
 |-- to_utc_timestamp: timestamp (nullable = true)



## unix_timestamp(..) & cast(..)

Konwersja **string** to a **timestamp**.

Lokalizacja funkcji 
* `pyspark.sql.functions` in the case of Python
* `org.apache.spark.sql.functions` in the case of Scala & Java

## 1. Zmiana formatu wartości timestamp yyyy-MM-dd'T'HH:mm:ss 
`unix_timestamp(..)`

Dokumentacja API `unix_timestamp(..)`:
> Convert time string with given pattern (see <a href="http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html" target="_blank">SimpleDateFormat</a>) to Unix time stamp (in seconds), return null if fail.

`SimpleDataFormat` is part of the Java API and provides support for parsing and formatting date and time values.

In [0]:
df = df.withColumn("timestamp", to_unix_timestamp("current_date", lit("yyyy-MM-dd'T'HH:mm:ss")))
display(df)

timestamp,unix,Date,unix_timestamp,date_format,to_unix_timestamp,from_unixtime,to_date,to_timestamp,from_utc_timestamp,to_utc_timestamp
1741737600,1646641525847,"May, 2021",1741794092,22/03/2015,1741737600,2025-03-12 15:41:32,2015-03-22,2025-03-12T15:41:32Z,2015-03-22T14:13:34Z,2025-03-12T15:41:32Z
1741737600,1646641557555,"Mar, 2021",1741794092,22/03/2015,1741737600,2025-03-12 15:41:32,2015-03-22,2025-03-12T15:41:32Z,2015-03-22T15:03:18Z,2025-03-12T15:41:32Z
1741737600,1646641578622,"Jan, 2021",1741794092,22/03/2015,1741737600,2025-03-12 15:41:32,2015-03-22,2025-03-12T15:41:32Z,2015-03-22T14:38:39Z,2025-03-12T15:41:32Z


2. Zmień format zgodnie z klasą `SimpleDateFormat`**yyyy-MM-dd HH:mm:ss**
  * a. Wyświetl schemat i dane żeby sprawdzicz czy wartości się zmieniły

In [0]:
# timestamp ma teraz typ long
df.printSchema()

root
 |-- timestamp: long (nullable = true)
 |-- unix: long (nullable = true)
 |-- Date: string (nullable = true)
 |-- unix_timestamp: long (nullable = true)
 |-- date_format: string (nullable = true)
 |-- to_unix_timestamp: long (nullable = true)
 |-- from_unixtime: string (nullable = true)
 |-- to_date: date (nullable = true)
 |-- to_timestamp: timestamp (nullable = true)
 |-- from_utc_timestamp: timestamp (nullable = true)
 |-- to_utc_timestamp: timestamp (nullable = true)



In [0]:
from pyspark.sql.types import StringType
df = df.withColumn("timestamp", df["timestamp"].cast(StringType()))
# zmiana typu na string
display(df)

timestamp,unix,Date,unix_timestamp,date_format,to_unix_timestamp,from_unixtime,to_date,to_timestamp,from_utc_timestamp,to_utc_timestamp
1741737600,1646641525847,"May, 2021",1741794281,22/03/2015,1741737600,2025-03-12 15:44:41,2015-03-22,2025-03-12T15:44:41Z,2015-03-22T14:13:34Z,2025-03-12T15:44:41Z
1741737600,1646641557555,"Mar, 2021",1741794281,22/03/2015,1741737600,2025-03-12 15:44:41,2015-03-22,2025-03-12T15:44:41Z,2015-03-22T15:03:18Z,2025-03-12T15:44:41Z
1741737600,1646641578622,"Jan, 2021",1741794281,22/03/2015,1741737600,2025-03-12 15:44:41,2015-03-22,2025-03-12T15:44:41Z,2015-03-22T14:38:39Z,2025-03-12T15:44:41Z


In [0]:
df.printSchema()

root
 |-- timestamp: string (nullable = true)
 |-- unix: long (nullable = true)
 |-- Date: string (nullable = true)
 |-- unix_timestamp: long (nullable = true)
 |-- date_format: string (nullable = true)
 |-- to_unix_timestamp: long (nullable = true)
 |-- from_unixtime: string (nullable = true)
 |-- to_date: date (nullable = true)
 |-- to_timestamp: timestamp (nullable = true)
 |-- from_utc_timestamp: timestamp (nullable = true)
 |-- to_utc_timestamp: timestamp (nullable = true)



## Stwórz nowe kolumny do DataFrame z wartościami year(..), month(..), dayofyear(..)

In [0]:
df = df.withColumn("year", year("to_date"))

display(df)

timestamp,unix,Date,unix_timestamp,date_format,to_unix_timestamp,from_unixtime,to_date,to_timestamp,from_utc_timestamp,to_utc_timestamp,year
1741737600,1646641525847,"May, 2021",1741794530,22/03/2015,1741737600,2025-03-12 15:48:50,2015-03-22,2025-03-12T15:48:50Z,2015-03-22T14:13:34Z,2025-03-12T15:48:50Z,2015
1741737600,1646641557555,"Mar, 2021",1741794530,22/03/2015,1741737600,2025-03-12 15:48:50,2015-03-22,2025-03-12T15:48:50Z,2015-03-22T15:03:18Z,2025-03-12T15:48:50Z,2015
1741737600,1646641578622,"Jan, 2021",1741794530,22/03/2015,1741737600,2025-03-12 15:48:50,2015-03-22,2025-03-12T15:48:50Z,2015-03-22T14:38:39Z,2025-03-12T15:48:50Z,2015


In [0]:
df = df.withColumn("month", month("to_date"))

display(df)

timestamp,unix,Date,unix_timestamp,date_format,to_unix_timestamp,from_unixtime,to_date,to_timestamp,from_utc_timestamp,to_utc_timestamp,year,month
1741737600,1646641525847,"May, 2021",1741794555,22/03/2015,1741737600,2025-03-12 15:49:15,2015-03-22,2025-03-12T15:49:15Z,2015-03-22T14:13:34Z,2025-03-12T15:49:15Z,2015,3
1741737600,1646641557555,"Mar, 2021",1741794555,22/03/2015,1741737600,2025-03-12 15:49:15,2015-03-22,2025-03-12T15:49:15Z,2015-03-22T15:03:18Z,2025-03-12T15:49:15Z,2015,3
1741737600,1646641578622,"Jan, 2021",1741794555,22/03/2015,1741737600,2025-03-12 15:49:15,2015-03-22,2025-03-12T15:49:15Z,2015-03-22T14:38:39Z,2025-03-12T15:49:15Z,2015,3


In [0]:
df = df.withColumn("day", day("to_date"))

display(df)

timestamp,unix,Date,unix_timestamp,date_format,to_unix_timestamp,from_unixtime,to_date,to_timestamp,from_utc_timestamp,to_utc_timestamp,year,month,day
1741737600,1646641525847,"May, 2021",1741794556,22/03/2015,1741737600,2025-03-12 15:49:16,2015-03-22,2025-03-12T15:49:16Z,2015-03-22T14:13:34Z,2025-03-12T15:49:16Z,2015,3,22
1741737600,1646641557555,"Mar, 2021",1741794556,22/03/2015,1741737600,2025-03-12 15:49:16,2015-03-22,2025-03-12T15:49:16Z,2015-03-22T15:03:18Z,2025-03-12T15:49:16Z,2015,3,22
1741737600,1646641578622,"Jan, 2021",1741794556,22/03/2015,1741737600,2025-03-12 15:49:16,2015-03-22,2025-03-12T15:49:16Z,2015-03-22T14:38:39Z,2025-03-12T15:49:16Z,2015,3,22


In [0]:
df = df.withColumn("dayofyear", dayofyear("to_date"))

display(df)

timestamp,unix,Date,unix_timestamp,date_format,to_unix_timestamp,from_unixtime,to_date,to_timestamp,from_utc_timestamp,to_utc_timestamp,year,month,day,dayofyear
1741737600,1646641525847,"May, 2021",1741794705,22/03/2015,1741737600,2025-03-12 15:51:45,2015-03-22,2025-03-12T15:51:45Z,2015-03-22T14:13:34Z,2025-03-12T15:51:45Z,2015,3,22,81
1741737600,1646641557555,"Mar, 2021",1741794705,22/03/2015,1741737600,2025-03-12 15:51:45,2015-03-22,2025-03-12T15:51:45Z,2015-03-22T15:03:18Z,2025-03-12T15:51:45Z,2015,3,22,81
1741737600,1646641578622,"Jan, 2021",1741794705,22/03/2015,1741737600,2025-03-12 15:51:45,2015-03-22,2025-03-12T15:51:45Z,2015-03-22T14:38:39Z,2025-03-12T15:51:45Z,2015,3,22,81


In [0]:
df = df.withColumn("second", second("to_timestamp"))

display(df)

timestamp,unix,Date,unix_timestamp,date_format,to_unix_timestamp,from_unixtime,to_date,to_timestamp,from_utc_timestamp,to_utc_timestamp,year,month,day,dayofyear,second
1741737600,1646641525847,"May, 2021",1741794729,22/03/2015,1741737600,2025-03-12 15:52:09,2015-03-22,2025-03-12T15:52:09Z,2015-03-22T14:13:34Z,2025-03-12T15:52:09Z,2015,3,22,81,9
1741737600,1646641557555,"Mar, 2021",1741794729,22/03/2015,1741737600,2025-03-12 15:52:09,2015-03-22,2025-03-12T15:52:09Z,2015-03-22T15:03:18Z,2025-03-12T15:52:09Z,2015,3,22,81,9
1741737600,1646641578622,"Jan, 2021",1741794729,22/03/2015,1741737600,2025-03-12 15:52:09,2015-03-22,2025-03-12T15:52:09Z,2015-03-22T14:38:39Z,2025-03-12T15:52:09Z,2015,3,22,81,9


In [0]:
df = df.withColumn("minute", minute("to_timestamp"))

display(df)

timestamp,unix,Date,unix_timestamp,date_format,to_unix_timestamp,from_unixtime,to_date,to_timestamp,from_utc_timestamp,to_utc_timestamp,year,month,day,dayofyear,second,minute
1741737600,1646641525847,"May, 2021",1741794743,22/03/2015,1741737600,2025-03-12 15:52:23,2015-03-22,2025-03-12T15:52:23Z,2015-03-22T14:13:34Z,2025-03-12T15:52:23Z,2015,3,22,81,23,52
1741737600,1646641557555,"Mar, 2021",1741794743,22/03/2015,1741737600,2025-03-12 15:52:23,2015-03-22,2025-03-12T15:52:23Z,2015-03-22T15:03:18Z,2025-03-12T15:52:23Z,2015,3,22,81,23,52
1741737600,1646641578622,"Jan, 2021",1741794743,22/03/2015,1741737600,2025-03-12 15:52:23,2015-03-22,2025-03-12T15:52:23Z,2015-03-22T14:38:39Z,2025-03-12T15:52:23Z,2015,3,22,81,23,52
