In [0]:
/FileStore/tables/Sales_SalesOrderDetail-2.csv

In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, IntegerType, TimestampType
from pyspark.sql.functions import to_utc_timestamp, from_utc_timestamp

# Initialize Spark session (if not already initialized)
spark = SparkSession.builder.appName("TimeZoneConversion").getOrCreate()

# Define the schema
schema = StructType([
    StructField("SalesOrderID", IntegerType(), True),
    StructField("ModifiedDate", TimestampType(), True)
])

# Load the CSV data into a DataFrame
file_path = '/FileStore/tables/Sales_SalesOrderDetail-2.csv'
df = spark.read.csv(file_path, header=True, schema=schema, timestampFormat="yyyy-MM-dd HH:mm:ss.SSS")

# Show the DataFrame to verify
print("Step 1: Original DataFrame")
df.show(truncate=False)

# Step 2a: Create a DataFrame with ModifiedDate in UTC
df_utc = df.withColumn("ModifiedDate_UTC", to_utc_timestamp("ModifiedDate", "UTC"))

# Show the DataFrame with UTC column
print("Step 2a: DataFrame with UTC Column")
df_utc.select("SalesOrderID", "ModifiedDate", "ModifiedDate_UTC").show(truncate=False)

# Step 2b: Create a DataFrame with ModifiedDate in IST
df_ist = df.withColumn("ModifiedDate_IST", from_utc_timestamp(to_utc_timestamp("ModifiedDate", "UTC"), "Asia/Kolkata"))

# Show the DataFrame with IST column
print("Step 2b: DataFrame with IST Column")
df_ist.select("SalesOrderID", "ModifiedDate", "ModifiedDate_IST").show(truncate=False)


In [0]:
%sql
--Step 1: Create a temporary view with sample data
CREATE OR REPLACE TEMP VIEW sample_data AS
SELECT
    1 AS SalesOrderID, '2024-09-01 15:30:00' AS ModifiedDate
UNION ALL
SELECT
    2 AS SalesOrderID, '2024-09-02 10:00:00' AS ModifiedDate
UNION ALL
SELECT
    3 AS SalesOrderID, '2024-09-03 20:15:00' AS ModifiedDate
UNION ALL
SELECT
    4 AS SalesOrderID, '2024-09-04 05:45:00' AS ModifiedDate
UNION ALL
SELECT
    5 AS SalesOrderID, '2024-09-05 22:00:00' AS ModifiedDate
UNION ALL
SELECT
    6 AS SalesOrderID, '2024-09-06 13:30:00' AS ModifiedDate
UNION ALL
SELECT
    7 AS SalesOrderID, '2024-09-07 01:00:00' AS ModifiedDate
UNION ALL
SELECT
    8 AS SalesOrderID, '2024-09-08 08:15:00' AS ModifiedDate
UNION ALL
SELECT
    9 AS SalesOrderID, '2024-09-09 18:30:00' AS ModifiedDate
UNION ALL
SELECT
    10 AS SalesOrderID, '2024-09-10 12:00:00' AS ModifiedDate;

-- Verify the data
SELECT * FROM sample_data;


SalesOrderID,ModifiedDate
1,2024-09-01 15:30:00
2,2024-09-02 10:00:00
3,2024-09-03 20:15:00
4,2024-09-04 05:45:00
5,2024-09-05 22:00:00
6,2024-09-06 13:30:00
7,2024-09-07 01:00:00
8,2024-09-08 08:15:00
9,2024-09-09 18:30:00
10,2024-09-10 12:00:00


In [0]:
%sql
-- Step 2a: Create a view with ModifiedDate in UTC
CREATE OR REPLACE TEMP VIEW sample_data_utc AS
SELECT 
    SalesOrderID,
    ModifiedDate,
    -- Convert ModifiedDate to UTC
    to_utc_timestamp(ModifiedDate, 'UTC') AS ModifiedDate_UTC
FROM sample_data;

-- Verify the UTC column
SELECT SalesOrderID, ModifiedDate, ModifiedDate_UTC
FROM sample_data_utc;


SalesOrderID,ModifiedDate,ModifiedDate_UTC
1,2024-09-01 15:30:00,2024-09-01T15:30:00.000+0000
2,2024-09-02 10:00:00,2024-09-02T10:00:00.000+0000
3,2024-09-03 20:15:00,2024-09-03T20:15:00.000+0000
4,2024-09-04 05:45:00,2024-09-04T05:45:00.000+0000
5,2024-09-05 22:00:00,2024-09-05T22:00:00.000+0000
6,2024-09-06 13:30:00,2024-09-06T13:30:00.000+0000
7,2024-09-07 01:00:00,2024-09-07T01:00:00.000+0000
8,2024-09-08 08:15:00,2024-09-08T08:15:00.000+0000
9,2024-09-09 18:30:00,2024-09-09T18:30:00.000+0000
10,2024-09-10 12:00:00,2024-09-10T12:00:00.000+0000


In [0]:
%sql
-- Step 2b: Create a view with ModifiedDate in IST
CREATE OR REPLACE TEMP VIEW sample_data_ist AS
SELECT 
    SalesOrderID,
    ModifiedDate,
    -- Convert ModifiedDate to IST
    from_utc_timestamp(to_utc_timestamp(ModifiedDate, 'UTC'), 'Asia/Kolkata') AS ModifiedDate_IST
FROM sample_data;

-- Verify the IST column
SELECT SalesOrderID, ModifiedDate, ModifiedDate_IST
FROM sample_data_ist;


SalesOrderID,ModifiedDate,ModifiedDate_IST
1,2024-09-01 15:30:00,2024-09-01T21:00:00.000+0000
2,2024-09-02 10:00:00,2024-09-02T15:30:00.000+0000
3,2024-09-03 20:15:00,2024-09-04T01:45:00.000+0000
4,2024-09-04 05:45:00,2024-09-04T11:15:00.000+0000
5,2024-09-05 22:00:00,2024-09-06T03:30:00.000+0000
6,2024-09-06 13:30:00,2024-09-06T19:00:00.000+0000
7,2024-09-07 01:00:00,2024-09-07T06:30:00.000+0000
8,2024-09-08 08:15:00,2024-09-08T13:45:00.000+0000
9,2024-09-09 18:30:00,2024-09-10T00:00:00.000+0000
10,2024-09-10 12:00:00,2024-09-10T17:30:00.000+0000


In [0]:
%sql
-- Select statement to view ModifiedDate, UTC, and IST together
SELECT
    SalesOrderID,
    ModifiedDate AS OriginalDate,
    to_utc_timestamp(ModifiedDate, 'UTC') AS ModifiedDate_UTC,
    from_utc_timestamp(to_utc_timestamp(ModifiedDate, 'UTC'), 'Asia/Kolkata') AS ModifiedDate_IST
FROM sample_data;


SalesOrderID,OriginalDate,ModifiedDate_UTC,ModifiedDate_IST
1,2024-09-01 15:30:00,2024-09-01T15:30:00.000+0000,2024-09-01T21:00:00.000+0000
2,2024-09-02 10:00:00,2024-09-02T10:00:00.000+0000,2024-09-02T15:30:00.000+0000
3,2024-09-03 20:15:00,2024-09-03T20:15:00.000+0000,2024-09-04T01:45:00.000+0000
4,2024-09-04 05:45:00,2024-09-04T05:45:00.000+0000,2024-09-04T11:15:00.000+0000
5,2024-09-05 22:00:00,2024-09-05T22:00:00.000+0000,2024-09-06T03:30:00.000+0000
6,2024-09-06 13:30:00,2024-09-06T13:30:00.000+0000,2024-09-06T19:00:00.000+0000
7,2024-09-07 01:00:00,2024-09-07T01:00:00.000+0000,2024-09-07T06:30:00.000+0000
8,2024-09-08 08:15:00,2024-09-08T08:15:00.000+0000,2024-09-08T13:45:00.000+0000
9,2024-09-09 18:30:00,2024-09-09T18:30:00.000+0000,2024-09-10T00:00:00.000+0000
10,2024-09-10 12:00:00,2024-09-10T12:00:00.000+0000,2024-09-10T17:30:00.000+0000


In [0]:
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, DecimalType, DateType

# Define the schema
schema = StructType([
    StructField("SalesOrderID", IntegerType(), True),
    StructField("SalesOrderDetailID", IntegerType(), True),
    StructField("CarrierTrackingNumber", StringType(), True),
    StructField("OrderQty", IntegerType(), True),
    StructField("ProductID", IntegerType(), True),
    StructField("SpecialOfferID", IntegerType(), True),
    StructField("UnitPrice", DecimalType(10, 2), True),
    StructField("UnitPriceDiscount", DecimalType(10, 2), True),
    StructField("LineTotal", DecimalType(20, 2), True),
    StructField("rowguid", StringType(), True),
    StructField("ModifiedDate", DateType(), True)
])

# Load the data with the schema
df = spark.read.csv("/FileStore/tables/Sales_SalesOrderDetail-2.csv", schema=schema, header=False)

# Show the DataFrame
df.show()

from pyspark.sql.functions import from_utc_timestamp, to_utc_timestamp

# Load the data with the schema
df = spark.read.csv("/FileStore/tables/Sales_SalesOrderDetail-2.csv", schema=schema, header=False)

# Add two extra columns for IST and UTC timestamps
df_with_timezones = df.withColumn(
    "IST", from_utc_timestamp("ModifiedDate", "Asia/Kolkata")
).withColumn(
    "UTC", to_utc_timestamp("ModifiedDate", "UTC")
)

# Show the DataFrame with the new columns for IST and UTC
df_with_timezones.select("ModifiedDate", "IST", "UTC").show(truncate=False)

+------------+------------------+---------------------+--------+---------+--------------+---------+-----------------+---------+--------------------+------------+
|SalesOrderID|SalesOrderDetailID|CarrierTrackingNumber|OrderQty|ProductID|SpecialOfferID|UnitPrice|UnitPriceDiscount|LineTotal|             rowguid|ModifiedDate|
+------------+------------------+---------------------+--------+---------+--------------+---------+-----------------+---------+--------------------+------------+
|       43659|                 1|         4911-403C-98|       1|      776|             1|  2024.99|             0.00|  2024.99|B207C96D-D9E6-402...|  2011-05-31|
|       43659|                 2|         4911-403C-98|       3|      777|             1|  2024.99|             0.00|  6074.98|7ABB600D-1E77-41B...|  2011-05-31|
|       43659|                 3|         4911-403C-98|       1|      778|             1|  2024.99|             0.00|  2024.99|475CF8C6-49F6-486...|  2011-05-31|
|       43659|              

In [0]:
%sql
--Step 1: Create a temporary view with sample data
CREATE OR REPLACE TEMP VIEW sample_data AS
SELECT
    1 AS SalesOrderID, '2024-09-01 15:30:00' AS ModifiedDate
UNION ALL
SELECT
    2 AS SalesOrderID, '2024-09-02 10:00:00' AS ModifiedDate
UNION ALL
SELECT
    3 AS SalesOrderID, '2024-09-03 20:15:00' AS ModifiedDate
UNION ALL
SELECT
    4 AS SalesOrderID, '2024-09-04 05:45:00' AS ModifiedDate
UNION ALL
SELECT
    5 AS SalesOrderID, '2024-09-05 22:00:00' AS ModifiedDate
UNION ALL
SELECT
    6 AS SalesOrderID, '2024-09-06 13:30:00' AS ModifiedDate
UNION ALL
SELECT
    7 AS SalesOrderID, '2024-09-07 01:00:00' AS ModifiedDate
UNION ALL
SELECT
    8 AS SalesOrderID, '2024-09-08 08:15:00' AS ModifiedDate
UNION ALL
SELECT
    9 AS SalesOrderID, '2024-09-09 18:30:00' AS ModifiedDate
UNION ALL
SELECT
    10 AS SalesOrderID, '2024-09-10 12:00:00' AS ModifiedDate;





CREATE OR REPLACE TEMP VIEW sample_data_utc AS
SELECT 
    SalesOrderID,
    ModifiedDate,
    -- Convert ModifiedDate to UTC
    to_utc_timestamp(ModifiedDate, 'UTC') AS ModifiedDate_UTC
FROM sample_data;

CREATE OR REPLACE TEMP VIEW sample_data_ist AS
SELECT 
    SalesOrderID,
    ModifiedDate,
    -- Convert ModifiedDate to IST
    from_utc_timestamp(to_utc_timestamp(ModifiedDate, 'UTC'), 'Asia/Kolkata') AS ModifiedDate_IST
FROM sample_data;

SELECT
    SalesOrderID,
    ModifiedDate AS OriginalDate,
    to_utc_timestamp(ModifiedDate, 'UTC') AS ModifiedDate_UTC,
    from_utc_timestamp(to_utc_timestamp(ModifiedDate, 'UTC'), 'Asia/Kolkata') AS ModifiedDate_IST
FROM sample_data;

SalesOrderID,OriginalDate,ModifiedDate_UTC,ModifiedDate_IST
1,2024-09-01 15:30:00,2024-09-01T15:30:00.000+0000,2024-09-01T21:00:00.000+0000
2,2024-09-02 10:00:00,2024-09-02T10:00:00.000+0000,2024-09-02T15:30:00.000+0000
3,2024-09-03 20:15:00,2024-09-03T20:15:00.000+0000,2024-09-04T01:45:00.000+0000
4,2024-09-04 05:45:00,2024-09-04T05:45:00.000+0000,2024-09-04T11:15:00.000+0000
5,2024-09-05 22:00:00,2024-09-05T22:00:00.000+0000,2024-09-06T03:30:00.000+0000
6,2024-09-06 13:30:00,2024-09-06T13:30:00.000+0000,2024-09-06T19:00:00.000+0000
7,2024-09-07 01:00:00,2024-09-07T01:00:00.000+0000,2024-09-07T06:30:00.000+0000
8,2024-09-08 08:15:00,2024-09-08T08:15:00.000+0000,2024-09-08T13:45:00.000+0000
9,2024-09-09 18:30:00,2024-09-09T18:30:00.000+0000,2024-09-10T00:00:00.000+0000
10,2024-09-10 12:00:00,2024-09-10T12:00:00.000+0000,2024-09-10T17:30:00.000+0000
