In [2]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, LongType, StringType, TimestampType
from pyspark.sql.functions import col, to_timestamp, lit
from datetime import datetime
import pandas as pd

In [3]:
spark = SparkSession.builder\
        .master("local[*]") \
        .appName("WorkerLogins")\
        .getOrCreate()

In [4]:
schema = StructType([
    StructField("id", LongType(), True),
    StructField("worker_id", LongType(), True),
    StructField("login_timestamp", TimestampType(), True),
    StructField("ip_address", StringType(), True),
    StructField("country", StringType(), True),
    StructField("region", StringType(), True),
    StructField("city", StringType(), True),
    StructField("device_type", StringType(), True)
])
# Sample Data
data = [
    (1, 101, datetime(2024, 2, 15, 8, 30, 0), "192.168.1.1", "USA", "California", "Los Angeles", "Laptop"),
    (2, 102, datetime(2024, 2, 15, 9, 15, 0), "192.168.1.2", "Canada", "Ontario", "Toronto", "Mobile"),
    (3, 103, datetime(2024, 2, 16, 12, 45, 0), "192.168.1.3", "UK", "England", "London", "Tablet"),
    (4, 104, datetime(2024, 2, 17, 14, 10, 0), "192.168.1.4", "India", "Maharashtra", "Mumbai", "Laptop"),
    (5, 105, datetime(2024, 2, 18, 16, 55, 0), "192.168.1.5", "Germany", "Bavaria", "Munich", "Mobile")
]

worker_logins = spark.createDataFrame(data, schema=schema)

# Start writing code
df_worker_logins = worker_logins.select("worker_id").filter(
    (col("login_timestamp") >= to_timestamp(lit("2024-02-10 00:00:00"))) & 
    (col("login_timestamp") <= to_timestamp(lit("2024-02-15 23:59:59")))
)

# To validate your solution, convert your final pySpark df to a pandas df
#worker_logins.toPandas()

In [5]:
worker_logins.show()

+---+---------+-------------------+-----------+-------+-----------+-----------+-----------+
| id|worker_id|    login_timestamp| ip_address|country|     region|       city|device_type|
+---+---------+-------------------+-----------+-------+-----------+-----------+-----------+
|  1|      101|2024-02-15 08:30:00|192.168.1.1|    USA| California|Los Angeles|     Laptop|
|  2|      102|2024-02-15 09:15:00|192.168.1.2| Canada|    Ontario|    Toronto|     Mobile|
|  3|      103|2024-02-16 12:45:00|192.168.1.3|     UK|    England|     London|     Tablet|
|  4|      104|2024-02-17 14:10:00|192.168.1.4|  India|Maharashtra|     Mumbai|     Laptop|
|  5|      105|2024-02-18 16:55:00|192.168.1.5|Germany|    Bavaria|     Munich|     Mobile|
+---+---------+-------------------+-----------+-------+-----------+-----------+-----------+



In [6]:
df_worker_logins.show()

+---------+
|worker_id|
+---------+
|      101|
|      102|
+---------+



In [7]:
SparkSession.stop(spark)