Użyj każdą z tych funkcji 
* `unix_timestamp()` 
* `date_format()`
* `to_unix_timestamp()`
* `from_unixtime()`
* `to_date()` 
* `to_timestamp()` 
* `from_utc_timestamp()` 
* `to_utc_timestamp()`

In [0]:
%scala
import org.apache.spark.sql.functions._

val kolumny = Seq("timestamp","unix", "Date")
val dane = Seq(("2015-03-22T14:13:34", 1646641525847L,"May, 2021"),
               ("2015-03-22T15:03:18", 1646641557555L,"Mar, 2021"),
               ("2015-03-22T14:38:39", 1646641578622L,"Jan, 2021"))

var dataFrame = spark.createDataFrame(dane).toDF(kolumny:_*)
  .withColumn("current_date",current_date().as("current_date"))
  .withColumn("current_timestamp",current_timestamp().as("current_timestamp"))
display(dataFrame)

timestamp,unix,Date,current_date,current_timestamp
2015-03-22T14:13:34,1646641525847,"May, 2021",2025-03-18,2025-03-18T07:59:04.789+0000
2015-03-22T15:03:18,1646641557555,"Mar, 2021",2025-03-18,2025-03-18T07:59:04.789+0000
2015-03-22T14:38:39,1646641578622,"Jan, 2021",2025-03-18,2025-03-18T07:59:04.789+0000


In [0]:
%scala
dataFrame.printSchema()

In [0]:
%scala
dataFrame.createOrReplaceTempView("temp_table")

In [0]:
df = spark.sql("SELECT * FROM temp_table")

## unix_timestamp(..) & cast(..)

Konwersja **string** to a **timestamp**.

Lokalizacja funkcji 
* `pyspark.sql.functions` in the case of Python
* `org.apache.spark.sql.functions` in the case of Scala & Java

## 1. Zmiana formatu wartości timestamp yyyy-MM-dd'T'HH:mm:ss 
`unix_timestamp(..)`

Dokumentacja API `unix_timestamp(..)`:
> Convert time string with given pattern (see <a href="http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html" target="_blank">SimpleDateFormat</a>) to Unix time stamp (in seconds), return null if fail.

`SimpleDataFormat` is part of the Java API and provides support for parsing and formatting date and time values.

2. Zmień format zgodnie z klasą `SimpleDateFormat`**yyyy-MM-dd HH:mm:ss**
  * a. Wyświetl schemat i dane żeby sprawdzicz czy wartości się zmieniły

In [0]:
from pyspark.sql.functions import col, to_timestamp

zmianaFormatu = df.withColumn("timestamp", to_timestamp(col("timestamp")))

zmianaFormatu.printSchema()

root
 |-- timestamp: timestamp (nullable = true)
 |-- unix: long (nullable = false)
 |-- Date: string (nullable = true)
 |-- current_date: date (nullable = false)
 |-- current_timestamp: timestamp (nullable = false)



In [0]:
# unix_timestamp
from pyspark.sql.functions import unix_timestamp

tempE = zmianaFormatu.select(unix_timestamp(col("timestamp"), "yyyy-MM-dd HH:mm:ss").alias("unix"))

display(tempE)

unix
1427033614
1427036598
1427035119


## Stwórz nowe kolumny do DataFrame z wartościami year(..), month(..), dayofyear(..)

In [0]:
# date_format()
from pyspark.sql.functions import date_format

yearDate = zmianaFormatu.withColumn("year", date_format(col("current_timestamp"), "yyyy")) \
    .withColumn("month", date_format(col("current_timestamp"), "MM")) \
    .withColumn("DayOfYear", date_format(col("current_timestamp"), "DD"))

display(yearDate)

timestamp,unix,Date,current_date,current_timestamp,year,month,DayOfYear
2015-03-22T14:13:34.000+0000,1646641525847,"May, 2021",2025-03-18,2025-03-18T09:37:57.614+0000,2025,3,77
2015-03-22T15:03:18.000+0000,1646641557555,"Mar, 2021",2025-03-18,2025-03-18T09:37:57.614+0000,2025,3,77
2015-03-22T14:38:39.000+0000,1646641578622,"Jan, 2021",2025-03-18,2025-03-18T09:37:57.614+0000,2025,3,77


In [0]:
# to_date()
from pyspark.sql.functions import to_date, year, month, dayofyear
toDate = zmianaFormatu.withColumn("year", year(to_date(col("current_timestamp")))) \
    .withColumn("month", month(to_date(col("current_timestamp")))) \
    .withColumn("DayOfYear", dayofyear(to_date(col("current_timestamp"))))

display(toDate)

timestamp,unix,Date,current_date,current_timestamp,year,month,DayOfYear
2015-03-22T14:13:34.000+0000,1646641525847,"May, 2021",2025-03-18,2025-03-18T09:39:46.833+0000,2025,3,77
2015-03-22T15:03:18.000+0000,1646641557555,"Mar, 2021",2025-03-18,2025-03-18T09:39:46.833+0000,2025,3,77
2015-03-22T14:38:39.000+0000,1646641578622,"Jan, 2021",2025-03-18,2025-03-18T09:39:46.833+0000,2025,3,77


In [0]:
# from_unixtime()
fromUnix = zmianaFormatu.withColumn("date", from_unixtime(col("unix") / 1000, "yyyy-MM-dd HH:mm:ss"))
display(fromUnix)

timestamp,unix,date,current_date,current_timestamp
2015-03-22T14:13:34.000+0000,1646641525847,2022-03-07 08:25:25,2025-03-18,2025-03-18T09:56:57.722+0000
2015-03-22T15:03:18.000+0000,1646641557555,2022-03-07 08:25:57,2025-03-18,2025-03-18T09:56:57.722+0000
2015-03-22T14:38:39.000+0000,1646641578622,2022-03-07 08:26:18,2025-03-18,2025-03-18T09:56:57.722+0000


In [0]:
# to_timestamp()
toTimestamp = zmianaFormatu.withColumn("Date", to_timestamp(col("Date"), "MMM, yyyy"))
display(toTimestamp)

timestamp,unix,Date,current_date,current_timestamp
2015-03-22T14:13:34.000+0000,1646641525847,2021-05-01T00:00:00.000+0000,2025-03-18,2025-03-18T09:52:55.124+0000
2015-03-22T15:03:18.000+0000,1646641557555,2021-03-01T00:00:00.000+0000,2025-03-18,2025-03-18T09:52:55.124+0000
2015-03-22T14:38:39.000+0000,1646641578622,2021-01-01T00:00:00.000+0000,2025-03-18,2025-03-18T09:52:55.124+0000


In [0]:
# to_utc_timestamp()
from pyspark.sql.functions import to_utc_timestamp
toUtcTimestamp = zmianaFormatu.withColumn("timestampUTC", to_utc_timestamp(col("timestamp"), "-02:00"))
display(toUtcTimestamp)

timestamp,unix,Date,current_date,current_timestamp,timestampUTC
2015-03-22T14:13:34.000+0000,1646641525847,"May, 2021",2025-03-18,2025-03-18T09:54:01.739+0000,2015-03-22T16:13:34.000+0000
2015-03-22T15:03:18.000+0000,1646641557555,"Mar, 2021",2025-03-18,2025-03-18T09:54:01.739+0000,2015-03-22T17:03:18.000+0000
2015-03-22T14:38:39.000+0000,1646641578622,"Jan, 2021",2025-03-18,2025-03-18T09:54:01.739+0000,2015-03-22T16:38:39.000+0000


In [0]:
# from_utc_timestamp()
from pyspark.sql.functions import from_utc_timestamp
fromUtcTimestamp = toUtcTimestamp.withColumn("fromUTC", from_utc_timestamp(col("timestampUTC"), "-02:00"))
display(fromUtcTimestamp)

timestamp,unix,Date,current_date,current_timestamp,timestampUTC,fromUTC
2015-03-22T14:13:34.000+0000,1646641525847,"May, 2021",2025-03-18,2025-03-18T09:55:39.389+0000,2015-03-22T16:13:34.000+0000,2015-03-22T14:13:34.000+0000
2015-03-22T15:03:18.000+0000,1646641557555,"Mar, 2021",2025-03-18,2025-03-18T09:55:39.389+0000,2015-03-22T17:03:18.000+0000,2015-03-22T15:03:18.000+0000
2015-03-22T14:38:39.000+0000,1646641578622,"Jan, 2021",2025-03-18,2025-03-18T09:55:39.389+0000,2015-03-22T16:38:39.000+0000,2015-03-22T14:38:39.000+0000
