Użyj każdą z tych funkcji 
* `unix_timestamp()` 
* `date_format()`
* `to_unix_timestamp()`
* `from_unixtime()`
* `to_date()` 
* `to_timestamp()` 
* `from_utc_timestamp()` 
* `to_utc_timestamp()`

In [0]:
%scala
import org.apache.spark.sql.functions._

val kolumny = Seq("timestamp","unix", "Date")
val dane = Seq(("2015-03-22T14:13:34", 1646641525847L,"May, 2021"),
               ("2015-03-22T15:03:18", 1646641557555L,"Mar, 2021"),
               ("2015-03-22T14:38:39", 1646641578622L,"Jan, 2021"))

var dataFrame = spark.createDataFrame(dane).toDF(kolumny:_*)
  .withColumn("current_date",current_date().as("current_date"))
  .withColumn("current_timestamp",current_timestamp().as("current_timestamp"))
display(dataFrame)

timestamp,unix,Date,current_date,current_timestamp
2015-03-22T14:13:34,1646641525847,"May, 2021",2025-03-14,2025-03-14T19:05:21.670+0000
2015-03-22T15:03:18,1646641557555,"Mar, 2021",2025-03-14,2025-03-14T19:05:21.670+0000
2015-03-22T14:38:39,1646641578622,"Jan, 2021",2025-03-14,2025-03-14T19:05:21.670+0000


In [0]:
%scala
dataFrame.printSchema()

## unix_timestamp(..) & cast(..)

Konwersja **string** to a **timestamp**.

Lokalizacja funkcji 
* `pyspark.sql.functions` in the case of Python
* `org.apache.spark.sql.functions` in the case of Scala & Java

## 1. Zmiana formatu wartości timestamp yyyy-MM-dd'T'HH:mm:ss 
`unix_timestamp(..)`

Dokumentacja API `unix_timestamp(..)`:
> Convert time string with given pattern (see <a href="http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html" target="_blank">SimpleDateFormat</a>) to Unix time stamp (in seconds), return null if fail.

`SimpleDataFormat` is part of the Java API and provides support for parsing and formatting date and time values.

In [0]:
from pyspark.sql.functions import * 
from pyspark.sql import SparkSession
from datetime import timedelta
kolumny = ("timestamp","unix", "Date")
dane = [("2015-03-22T14:13:34", 1646641525847,"May, 2021"), 
               ("2015-03-22T15:03:18", 1646641557555,"Mar, 2021"),
               ("2015-03-22T14:38:39", 1646641578622,"Jan, 2021")]
dataFrame = spark.createDataFrame(dane,kolumny)\
  .withColumn("current_date",current_date())\
  .withColumn("current_timestamp",current_timestamp())
'''
ts = sql("SELECT current_timestamp() AS ts")
ts = ts.collect()[0]['ts']
ts1 = ts + timedelta(days = 1)
ts1
ts2 = ts + timedelta(days=-1)
df = spark.createDataFrame(((ts,),(ts1,),(ts2,)),['timestamps'])
df = df.withColumn('unix_date',unix_timestamp(df['timestamps']))
df.display()
'''
df = dataFrame.withColumn('UnixTime',unix_timestamp(dataFrame['current_date']))
df.display()

timestamp,unix,Date,current_date,current_timestamp,UnixTime
2015-03-22T14:13:34,1646641525847,"May, 2021",2025-03-14,2025-03-14T20:16:26.055+0000,1741910400
2015-03-22T15:03:18,1646641557555,"Mar, 2021",2025-03-14,2025-03-14T20:16:26.055+0000,1741910400
2015-03-22T14:38:39,1646641578622,"Jan, 2021",2025-03-14,2025-03-14T20:16:26.055+0000,1741910400


2. Zmień format zgodnie z klasą `SimpleDateFormat`**yyyy-MM-dd HH:mm:ss**
  * a. Wyświetl schemat i dane żeby sprawdzicz czy wartości się zmieniły

In [0]:
kolumny = ("timestamp","unix", "Date")
dane = [("2015-03-22T14:13:34", 1646641525847,"May, 2021"), 
               ("2015-03-22T15:03:18", 1646641557555,"Mar, 2021"),
               ("2015-03-22T14:38:39", 1646641578622,"Jan, 2021")]
dataFrame = spark.createDataFrame(dane,kolumny)\
  .withColumn("current_date",current_date())\
  .withColumn("current_timestamp",current_timestamp())
zmianaFormatu = dataFrame.withColumn('formatted_date',date_format(from_unixtime(dataFrame['unix']/ 1000),"yyyy-MM-dd HH:mm:ss") )
# BO ZAPIS W MILISEKUNDACH TAK TO SIE ZGADZA         
zmianaFormatu.printSchema()
zmianaFormatu.display()

root
 |-- timestamp: string (nullable = true)
 |-- unix: long (nullable = true)
 |-- Date: string (nullable = true)
 |-- current_date: date (nullable = false)
 |-- current_timestamp: timestamp (nullable = false)
 |-- formatted_date: string (nullable = true)



timestamp,unix,Date,current_date,current_timestamp,formatted_date
2015-03-22T14:13:34,1646641525847,"May, 2021",2025-03-14,2025-03-14T20:21:05.315+0000,2022-03-07 08:25:25
2015-03-22T15:03:18,1646641557555,"Mar, 2021",2025-03-14,2025-03-14T20:21:05.315+0000,2022-03-07 08:25:57
2015-03-22T14:38:39,1646641578622,"Jan, 2021",2025-03-14,2025-03-14T20:21:05.315+0000,2022-03-07 08:26:18


In [0]:
%scala
//unix_timestamp
val tempE = 
display(tempE)

## Stwórz nowe kolumny do DataFrame z wartościami year(..), month(..), dayofyear(..)

In [0]:

#date_format
yearDate = dataFrame.withColumn('Year',year(dataFrame['current_date']))\
    .withColumn('Month',month(dataFrame['current_date']))\
    .withColumn('DayOfYear',dayofyear(dataFrame['current_date']))
yearDate.display()

timestamp,unix,Date,current_date,current_timestamp,Year,Month,DayOfYear
2015-03-22T14:13:34,1646641525847,"May, 2021",2025-03-14,2025-03-14T20:24:49.877+0000,2025,3,73
2015-03-22T15:03:18,1646641557555,"Mar, 2021",2025-03-14,2025-03-14T20:24:49.877+0000,2025,3,73
2015-03-22T14:38:39,1646641578622,"Jan, 2021",2025-03-14,2025-03-14T20:24:49.877+0000,2025,3,73


In [0]:

#to_date()
toDate = dataFrame.withColumn('TimestampToDate',to_date(dataFrame['timestamp'])  )\
    .withColumn('UnixToDate', to_date(from_unixtime(dataFrame['unix'] / 1000)))\
    .withColumn('DateToDate',to_date(dataFrame['Date'],'MMM, yyyy'))
toDate.display()

timestamp,unix,Date,current_date,current_timestamp,TimestampToDate,UnixToDate,DateToDate
2015-03-22T14:13:34,1646641525847,"May, 2021",2025-03-14,2025-03-14T21:25:06.888+0000,2015-03-22,2022-03-07,2021-05-01
2015-03-22T15:03:18,1646641557555,"Mar, 2021",2025-03-14,2025-03-14T21:25:06.888+0000,2015-03-22,2022-03-07,2021-03-01
2015-03-22T14:38:39,1646641578622,"Jan, 2021",2025-03-14,2025-03-14T21:25:06.888+0000,2015-03-22,2022-03-07,2021-01-01


In [0]:
#from_unixtime()
fromUnix = dataFrame.withColumn('FromUnix',from_unixtime(dataFrame['unix']/ 1000))
display(fromUnix)

timestamp,unix,Date,current_date,current_timestamp,FromUnix
2015-03-22T14:13:34,1646641525847,"May, 2021",2025-03-14,2025-03-14T21:26:09.936+0000,2022-03-07 08:25:25
2015-03-22T15:03:18,1646641557555,"Mar, 2021",2025-03-14,2025-03-14T21:26:09.936+0000,2022-03-07 08:25:57
2015-03-22T14:38:39,1646641578622,"Jan, 2021",2025-03-14,2025-03-14T21:26:09.936+0000,2022-03-07 08:26:18


In [0]:

#to_timestamp()
toTimestamp = dataFrame.withColumn('UnixTimestamp', from_unixtime(dataFrame['unix'] / 1000))\
    .withColumn('DateTimestamp', to_timestamp(dataFrame['Date'],'MMM, yyyy'))\
    .withColumn('DatetoTimestamp', to_timestamp(dataFrame['current_date']))
display(toTimestamp)


timestamp,unix,Date,current_date,current_timestamp,UnixTimestamp,DateTimestamp,DatetoTimestamp
2015-03-22T14:13:34,1646641525847,"May, 2021",2025-03-14,2025-03-14T21:29:16.660+0000,2022-03-07 08:25:25,2021-05-01T00:00:00.000+0000,2025-03-14T00:00:00.000+0000
2015-03-22T15:03:18,1646641557555,"Mar, 2021",2025-03-14,2025-03-14T21:29:16.660+0000,2022-03-07 08:25:57,2021-03-01T00:00:00.000+0000,2025-03-14T00:00:00.000+0000
2015-03-22T14:38:39,1646641578622,"Jan, 2021",2025-03-14,2025-03-14T21:29:16.660+0000,2022-03-07 08:26:18,2021-01-01T00:00:00.000+0000,2025-03-14T00:00:00.000+0000


In [0]:
#to_utc_timestamp()
toUtcTimestamp = dataFrame.withColumn('UTC',to_utc_timestamp(from_unixtime(dataFrame['unix']/ 1000 ), 'UTC') )\
    .withColumn('CET',to_utc_timestamp(dataFrame['current_date'], 'CET'))\
    .withColumn('toUTC', to_utc_timestamp(dataFrame['timestamp'], 'UTC'))
display(toUtcTimestamp)



timestamp,unix,Date,current_date,current_timestamp,UTC,CET,toUTC
2015-03-22T14:13:34,1646641525847,"May, 2021",2025-03-14,2025-03-14T21:55:15.318+0000,2022-03-07T08:25:25.000+0000,2025-03-13T23:00:00.000+0000,2015-03-22T14:13:34.000+0000
2015-03-22T15:03:18,1646641557555,"Mar, 2021",2025-03-14,2025-03-14T21:55:15.318+0000,2022-03-07T08:25:57.000+0000,2025-03-13T23:00:00.000+0000,2015-03-22T15:03:18.000+0000
2015-03-22T14:38:39,1646641578622,"Jan, 2021",2025-03-14,2025-03-14T21:55:15.318+0000,2022-03-07T08:26:18.000+0000,2025-03-13T23:00:00.000+0000,2015-03-22T14:38:39.000+0000


In [0]:
#from_utc_timestamp()
fromUtcTimestamp = dataFrame.withColumn('FROM_UTC_TIMESTAMP',from_utc_timestamp(dataFrame['timestamp'], "PST"))
display(fromUtcTimestamp)

timestamp,unix,Date,current_date,current_timestamp,FROM_UTC_TIMESTAMP
2015-03-22T14:13:34,1646641525847,"May, 2021",2025-03-14,2025-03-14T21:57:09.538+0000,2015-03-22T07:13:34.000+0000
2015-03-22T15:03:18,1646641557555,"Mar, 2021",2025-03-14,2025-03-14T21:57:09.538+0000,2015-03-22T08:03:18.000+0000
2015-03-22T14:38:39,1646641578622,"Jan, 2021",2025-03-14,2025-03-14T21:57:09.538+0000,2015-03-22T07:38:39.000+0000
