In [0]:
from pyspark.sql import functions as F
from pyspark.sql import types as T

#---CONFIG PATHS----
adls_account = "insurancedatalake01"
container = "datalake"

bronze_path= f"abfss://{container}@{adls_account}.dfs.core.windows.net/bronze"
silver_path = f"abfss://{container}@{adls_account}.dfs.core.windows.net/silver"



In [0]:
adls_account = "insurancedatalake01"
storage_key = os.environ.get("AZURE_STORAGE_KEY")

spark.conf.set(
    f"fs.azure.account.key.{adls_account}.dfs.core.windows.net",
    storage_key
)

In [0]:
#helper functions: Standardize column names, add metadata
"""def standardize_column_names(df):
    new_df = df
    for c in df.columns:
        new_name = (
            c.strip()
            .lower()
            .replace(" ","_")
            .replace("-","_")
            .replace("/","_")
        )
        if new_name !=c:
            new_df = new_df.withColumnRenamed(c,new_name)
    return new_df
"""
def add_metadata_column(df):
    """
    Add metadata column used for audit/trace
    """
    return (
        df.withColumn("ingestion_date", F.current_date())
        .withColumn("update_timestamp",F.current_timestamp())
    )

def get_mode(df, col_name):
    row = (
        df.where(F.col(col_name).isNotNull())
        .groupBy(col_name)
        .count()
        .orderBy(F.desc("count"))
        .first()
    )
    return row[0] if row is not None else None

In [0]:

#-----CUSTOMER SILVER------
df_customer_bronze = spark.read.format("delta").load(f"{bronze_path}/customer")
display(df_customer_bronze.limit(5))

##Standardize column names
#df_customer = standardize_column_names(df_customer_bronze)
df_customer = df_customer_bronze
##Standardize data types
df_customer = (
    df_customer
    .withColumn("DOB", F.to_date("DOB"))
    .withColumn("AnnualMileage", F.col("AnnualMileage").cast(T.IntegerType()))
    .withColumn("CreditScore", F.col("CreditScore").cast(T.IntegerType()))
    .withColumn("NoOfVehicles",F.col("NoOfVehicles").cast(T.IntegerType()))
)

df_customer = df_customer.filter(F.col("CustomerID").isNotNull())
df_customer = df_customer.dropDuplicates(["CustomerID"])

##Missing values Imputation
###Numeric: AnnualMileage, CreditScore, NoOfVehicles
annual_mileage_mean = df_customer.select(F.mean("AnnualMileage")).first()[0]
credit_score_mean = df_customer.select(F.mean("CreditScore")).collect()[0][0]
no_of_vehicles_mean = df_customer.select(F.mean("NoOfVehicles")).collect()[0][0]
df_customer = df_customer.withColumn(
    "WasAnnualMileageImputed",
    F.when(F.col("AnnualMileage").isNull(), F.lit(1)).otherwise(F.lit(0))
).withColumn(
    "WasCreditScoreImputed",
    F.when(F.col("CreditScore").isNull(),F.lit(1)).otherwise(F.lit(0))
).withColumn(
    "WasNoOfVehiclesImputed",
    F.when(F.col("NoOfVehicles").isNull(),F.lit(1)). otherwise(F.lit(0))
)

df_customer = df_customer.fillna({
    "AnnualMileage": annual_mileage_mean,
    "CreditScore": credit_score_mean,
    "NoOfVehicles": no_of_vehicles_mean
})

# Categorical: MaritalStatus, EmploymentStatus, VehicleType
df_customer = df_customer.withColumnRenamed("MarialStatus", "MaritalStatus")
marital_mode = get_mode(df_customer, "MaritalStatus")
employment_mode = get_mode(df_customer, "EmploymentStatus")
vehicle_type_mode = get_mode(df_customer, "VehicleType")

df_cust = df_customer.withColumn(
    "WasMaritalStatusImputed",
    F.when(F.col("MaritalStatus").isNull(), F.lit(1)).otherwise(F.lit(0))
).withColumn(
    "WasEmploymentStatusImputed",
    F.when(F.col("EmploymentStatus").isNull(), F.lit(1)).otherwise(F.lit(0))
).withColumn(
    "WasVehicleTypeImputed",
    F.when(F.col("VehicleType").isNull(), F.lit(1)).otherwise(F.lit(0))
)

df_customer = df_customer.fillna({
    "MaritalStatus": marital_mode,
    "EmploymentStatus": employment_mode,
    "VehicleType": vehicle_type_mode
})

df_customer = add_metadata_column(df_customer)
display(df_customer.limit(5))

CustomerID,DOB,MarialStatus,AnnualMileage,VehicleType,CreditScore,EmploymentStatus,NoOfVehicles
C000001,1993-10-23,Divorced,33305,Coupe,385,Retired,3
C000002,1974-12-10,Widowed,6873,Sedan,416,Retired,2
C000003,1998-07-04,Widowed,10410,Sedan,379,Employed,3
C000004,2007-10-29,Widowed,16763,Coupe,759,Employed,1
C000005,1987-09-25,Divorced,19592,Coupe,302,Self-employed,1


CustomerID,DOB,MaritalStatus,AnnualMileage,VehicleType,CreditScore,EmploymentStatus,NoOfVehicles,WasAnnualMileageImputed,WasCreditScoreImputed,WasNoOfVehiclesImputed,ingestion_date,update_timestamp
C000111,1965-01-04,Widowed,19927,Sedan,414,Self-employed,4,0,0,0,2025-12-06,2025-12-06T16:48:21.644+0000
C000142,1963-05-06,Divorced,27416,Hatchback,747,Self-employed,4,0,0,0,2025-12-06,2025-12-06T16:48:21.644+0000
C000299,1955-02-24,Widowed,12107,Truck,455,Retired,1,0,0,0,2025-12-06,2025-12-06T16:48:21.644+0000
C000433,1969-12-14,Divorced,23230,Hatchback,775,Employed,2,0,0,0,2025-12-06,2025-12-06T16:48:21.644+0000
C000805,1982-10-30,Widowed,17949,Sedan,659,Employed,3,0,0,0,2025-12-06,2025-12-06T16:48:21.644+0000


In [0]:

#----POLICY SILVER----
df_policy_bronze = spark.read.format("delta").load(f"{bronze_path}/policy")
display(df_policy_bronze.limit(5))

df_policy = df_policy_bronze #because column names was standardized

df_policy = (
    df_policy
    .withColumn("PolicyCreateDate", F.to_date("PolicyCreateDate"))
    .withColumn("PolicyExpirationDate", F.to_date("PolicyExpirationDate"))
    .withColumn("AnnualPremium", F.col("AnnualPremium").cast(T.DoubleType()))
    .withColumn("Excess", F.col("Excess").cast(T.DoubleType()))
)

df_policy = df_policy.filter(F.col("PolicyNumber").isNotNull()
                             & F.col("CustomerID").isNotNull())

df_policy = df_policy.dropDuplicates(["PolicyNumber"])

# Imputation cho số tiền nếu cần
annual_premium_mean = df_policy.select(F.avg("AnnualPremium")).first()[0]
excess_mean = df_policy.select(F.avg("Excess")).first()[0]

df_policy = df_policy.withColumn(
    "WasAnnualPremiumImputed",
    F.when(F.col("AnnualPremium").isNull(), F.lit(1)).otherwise(F.lit(0))
).withColumn(
    "WasExcessImputed",
    F.when(F.col("Excess").isNull(), F.lit(1)).otherwise(F.lit(0))
)

df_policy = df_policy.fillna({
    "AnnualPremium": annual_premium_mean,
    "Excess": excess_mean
})

#Time sanity flag: Expiration before Create?
df_policy = df_policy.withColumn(
    "IsPolicyDateInconsistent",
    F.when(
        (F.col("PolicyCreateDate").isNotNull())&
        (F.col("PolicyExpirationDate").isNotNull())&
        (F.col("PolicyExpirationDate") < F.col("PolicyCreateDate")),
        F.lit(1)
    ).otherwise(F.lit(0))
)

df_policy = df_policy.withColumn(
    "PolicyDuration",
    F.datediff("PolicyExpirationDate","PolicyCreateDate")
)

df_policy = add_metadata_column(df_policy)
display(df_policy.limit(5))


PolicyNumber,CustomerID,PolicyCreateDate,PolicyExpirationDate,AnnualPremium,Excess
P0000001,C000553,2025-06-16,2026-08-11,1408.19,250
P0000002,C000358,2019-11-11,2020-10-16,2891.48,500
P0000003,C000677,2022-10-04,2023-11-13,1290.57,750
P0000004,C000332,2023-02-22,2024-01-29,2968.59,250
P0000005,C000957,2023-06-18,2024-07-04,2463.29,250


PolicyNumber,CustomerID,PolicyCreateDate,PolicyExpirationDate,AnnualPremium,Excess,WasAnnualPremiumImputed,WasExcessImputed,IsPolicyDateInconsistent,PolicyDuration,ingestion_date,update_timestamp
P0000197,C000970,2019-03-04,2020-03-21,2964.9,500.0,0,0,0,383,2025-12-06,2025-12-06T16:48:28.135+0000
P0000302,C000770,2019-02-08,2020-02-03,2497.83,750.0,0,0,0,360,2025-12-06,2025-12-06T16:48:28.135+0000
P0000381,C000692,2017-08-28,2018-09-15,2038.73,750.0,0,0,0,383,2025-12-06,2025-12-06T16:48:28.135+0000
P0000384,C000890,2024-05-20,2025-05-12,2291.37,1500.0,0,0,0,357,2025-12-06,2025-12-06T16:48:28.135+0000
P0000397,C000945,2019-08-30,2020-08-19,2763.11,750.0,0,0,0,355,2025-12-06,2025-12-06T16:48:28.135+0000


In [0]:

#-----HANDLER SILVER-----
df_handler_bronze = spark.read.format("delta").load(f"{bronze_path}/handler")
display(df_handler_bronze.limit(5))

df_handler = df_handler_bronze #because column names was standardized

df_handler = (
    df_handler
    .withColumn("EmploymentDate", F.to_date("EmploymentDate"))
)

df_handler = df_handler.filter(F.col("HandlerID").isNotNull())
df_handler.dropDuplicates(["HandlerID"])

# Imputation cho Location, ExperienceLevel (mode)
loc_mode = get_mode(df_handler, "Location")
exp_mode = get_mode(df_handler, "ExperienceLevel")

df_handler = df_handler.withColumn(
    "WasLocationImputed",
    F.when(F.col("Location").isNull(), F.lit(1)).otherwise(F.lit(0))
).withColumn(
    "WasExperienceLevelImputed",
    F.when(F.col("ExperienceLevel").isNull(), F.lit(1)).otherwise(F.lit(0))
)

df_handler = df_handler.fillna({
    "Location": loc_mode,
    "ExperienceLevel": exp_mode
})


df_handler = df_handler.withColumn(
    "FullName",
    F.concat_ws(" ",F.col("FirstName"), F.col("LastName"))
)

df_handler = add_metadata_column(df_handler)
display(df_handler.limit(5))

HandlerID,FirstName,LastName,EmploymentDate,Location,ExperienceLevel
H0001,Jeremy,Miller,2023-10-15,Brisbane,Junior
H0002,Roger,Rasmussen,2025-07-31,Melbourne,Lead
H0003,Kayla,Bowen,2023-11-10,Adelaide,Senior
H0004,Lisa,Stone,2022-06-02,Adelaide,Mid
H0005,Sara,Salazar,2022-11-17,Melbourne,Mid


HandlerID,FirstName,LastName,EmploymentDate,Location,ExperienceLevel,WasLocationImputed,WasExperienceLevelImputed,FullName,ingestion_date,update_timestamp
H0001,Jeremy,Miller,2023-10-15,Brisbane,Junior,0,0,Jeremy Miller,2025-12-06,2025-12-06T16:48:34.980+0000
H0002,Roger,Rasmussen,2025-07-31,Melbourne,Lead,0,0,Roger Rasmussen,2025-12-06,2025-12-06T16:48:34.980+0000
H0003,Kayla,Bowen,2023-11-10,Adelaide,Senior,0,0,Kayla Bowen,2025-12-06,2025-12-06T16:48:34.980+0000
H0004,Lisa,Stone,2022-06-02,Adelaide,Mid,0,0,Lisa Stone,2025-12-06,2025-12-06T16:48:34.980+0000
H0005,Sara,Salazar,2022-11-17,Melbourne,Mid,0,0,Sara Salazar,2025-12-06,2025-12-06T16:48:34.980+0000


In [0]:
df_claims_bronze = spark.read.format("delta").load(f"{bronze_path}/claims")
display(df_claims_bronze.limit(5))

df_claims = df_claims_bronze #because column names was standardized

df_claims = (
    df_claims
    .withColumn("IncidentDate", F.to_date("IncidentDate"))
    .withColumn("ClaimCreateDate", F.to_date("ClaimCreateDate"))
    .withColumn("ClaimClosedDate", F.to_date("ClaimClosedDate"))
    .withColumn("HandlerAssignedDate", F.to_date("HandlerAssignedDate"))
    .withColumn("NoofVehiclesInvolved", F.col("NoofVehiclesInvolved").cast(T.IntegerType()))
    .withColumn("ClaimAmount", F.col("ClaimAmount").cast(T.DoubleType()))
)

df_claims = df_claims.filter(F.col("ClaimNumber").isNotNull())
df_claims.dropDuplicates(["ClaimNumber"])

#----DQ FLAGS (no drop, only flag)----

#---Date consistency: Closed < Created?---
df_claims = df_claims.withColumn(
    "IsClosedBeforeCreated",
    F.when(
        (F.col("ClaimClosedDate").isNotNull()) &
        (F.col("ClaimCreateDate").isNotNull()) &
        (F.col("ClaimClosedDate") < F.col("ClaimCreateDate")),
        F.lit(1)
    ).otherwise(F.lit(0))
)

#---Numeric sanity---
df_claims = df_claims.withColumn(
    "IsClaimAmountNegative",
    F.when(
        (F.col("ClaimAmount").isNotNull()) &
        (F.col("ClaimAmount") < 0),
        F.lit(1)
    ).otherwise(F.lit(0))
).withColumn(
    "IsVehicleCountInvalid",
    F.when(
        (F.col("NoOfVehiclesInvolved").isNotNull()) &
        (F.col("NoOfVehiclesInvolved") <= 0),
        F.lit(1)
    ).otherwise(F.lit(0))
)

#---IncidentSeverity domain check---
valid_severity = ["Minor","Moderate","Severe","Total Loss"]
df_claims = df_claims.withColumn(
    "IsSeverityInvalid",
    F.when(~F.col("IncidentSeverity").isin(valid_severity), F.lit(1)).otherwise(F.lit(0))
)

#---Join DimPolicy + DimHandler + DimCustomer 
df_claims = df_claims.join(
    df_policy.select("PolicyNumber","PolicyCreateDate","PolicyExpirationDate", "AnnualPremium"),
    on = "PolicyNumber",
    how = "left"
)

df_claims = df_claims.join(
    df_handler.select("HandlerID","EmploymentDate"),
    on = "HandlerID",
    how ="left"
)

df_claims = df_claims.join(
    df_customer.select("CustomerID","DOB"),
    on = "CustomerID",
    how = "left"
)

#---Incident outside policy period---
df_claims = df_claims.withColumn(
    "IsIncidentOutsidePolicy",
    F.when(
        (F.col("IncidentDate").isNotNull()) &
        (F.col("PolicyCreateDate").isNotNull()) &
        (F.col("PolicyExpirationDate").isNotNull()) &
        (
            (F.col("IncidentDate") < F.col("PolicyCreateDate")) |
            (F.col("IncidentDate") > F.col("PolicyExpirationDate"))
        ),
        F.lit(1)
    ).otherwise(F.lit(0))
)

#---Handler employment after claim created---
df_claims = df_claims.withColumn(
    "IsHandlerEmploymentInconsistent",
    F.when(
        (F.col("EmploymentDate").isNotNull()) &
        (F.col("ClaimCreateDate").isNotNull()) &
        (F.col("EmploymentDate") > F.col("ClaimCreateDate")),
        F.lit(1)
    ).otherwise(F.lit(0))
)

#---OutlierL ClaimAmount > 10 * AnnualPremium
df_claims = df_claims.withColumn(
    "IsClaimOutlier",
    F.when(
        (F.col("ClaimAmount").isNotNull()) &
        (F.col("AnnualPremium").isNotNull()) &
        (F.col("ClaimAmount") > F.col("AnnualPremium") * 10),
        F.lit(1)
    ).otherwise(F.lit(0))
)

#---FK missing flags
df_claims = df_claims.withColumn(
    "IsCustomerMissing",
    F.when(F.col("CustomerID").isNull(), F.lit(1)).otherwise(F.lit(0))
).withColumn(
    "IsPolicyMissing",
    F.when(F.col("PolicyNumber").isNull(), F.lit(1)).otherwise(F.lit(0))
).withColumn(
    "IsHandlerMissing",
    F.when(F.col("HandlerID").isNull(), F.lit(1)).otherwise(F.lit(0))
)

#---Immputation---
# NoOfVehiclesInvolved -> impute mean > 0
no_veh_mean = df_claims.select(F.avg("NoOfVehiclesInvolved")).first()[0]

df_claims = df_claims.withColumn(
    "WasNoOfVehiclesImputed",
    F.when(F.col("NoOfVehiclesInvolved").isNull(), F.lit(1)).otherwise(F.lit(0))
)

df_claims = df_claims.fillna({"NoOfVehiclesInvolved": no_veh_mean})

#---ClaimAmount missing -> flag only, no impute (sensitive)
df_claims = df_claims.withColumn(
    "IsClaimAmountMissing",
    F.when(F.col("ClaimAmount").isNull(), F.lit(1)).otherwise(F.lit(0))
)

#---Derive---

df_claims = df_claims.withColumn(
    "IsOpen",
    F.when(F.col("ClaimClosedDate").isNull(), F.lit(1)).otherwise(F.lit(0))
)

df_claims = df_claims.withColumn(
    "ClaimDuration",
    F.when(F.col("ClaimClosedDate").isNotNull(),
           F.datediff("ClaimClosedDate", "ClaimCreateDate"))
)

#---Customer Age at incident
df_claims = df_claims.withColumn(
    "CustomerAgeAtIncident",
    F.when(
        (F.col("IncidentDate").isNotNull()) &
        (F.col("DOB").isNotNull()),
        F.floor(F.months_between("IncidentDate", "DOB")/12)
    )
)

df_claims = add_metadata_column(df_claims)
display(df_claims.limit(5))


ClaimNumber,CustomerID,PolicyNumber,IncidentDate,ClaimCreateDate,ClaimClosedDate,HandlerID,HandlerAssignedDate,IncidentType,IncidentSeverity,NoofVehiclesInvolved,ClaimAmount
CL0000001,C000943,P0001808,2023-03-22,2023-03-24,2023-05-19,H0012,2023-03-26,Weather,Total Loss,2,5088.77
CL0000002,C000081,P0001088,2020-06-14,2020-06-26,2020-09-14,H0043,2020-06-29,Fire,Minor,1,605.77
CL0000003,C000628,P0001316,2020-05-24,2020-05-27,2020-09-19,H0041,2020-05-30,Fire,Moderate,4,4250.23
CL0000004,C000097,P0001878,2024-03-26,2024-03-31,2024-05-29,H0021,2024-04-03,Weather,Total Loss,3,8252.43
CL0000005,C000621,P0001972,2019-05-05,2019-05-07,2019-06-20,H0004,2019-05-07,Weather,Severe,3,1708.81


CustomerID,HandlerID,PolicyNumber,ClaimNumber,IncidentDate,ClaimCreateDate,ClaimClosedDate,HandlerAssignedDate,IncidentType,IncidentSeverity,NoofVehiclesInvolved,ClaimAmount,IsClosedBeforeCreated,IsClaimAmountNegative,IsVehicleCountInvalid,IsSeverityInvalid,PolicyCreateDate,PolicyExpirationDate,AnnualPremium,EmploymentDate,DOB,IsIncidentOutsidePolicy,IsHandlerEmploymentInconsistent,IsClaimOutlier,IsCustomerMissing,IsPolicyMissing,IsHandlerMissing,WasNoOfVehiclesImputed,IsClaimAmountMissing,IsOpen,ClaimDuration,CustomerAgeAtIncident,ingestion_date,update_timestamp
C000943,H0012,P0001808,CL0000001,2023-03-22,2023-03-24,2023-05-19,2023-03-26,Weather,Total Loss,2,5088.77,0,0,0,0,2022-06-25,2023-05-30,2610.94,2024-07-18,1974-05-31,0,1,0,0,0,0,0,0,0,56,48,2025-12-06,2025-12-06T16:49:47.677+0000
C000081,H0043,P0001088,CL0000002,2020-06-14,2020-06-26,2020-09-14,2020-06-29,Fire,Minor,1,605.77,0,0,0,0,2020-02-12,2021-02-21,2938.1,2023-03-30,1950-01-13,0,1,0,0,0,0,0,0,0,80,70,2025-12-06,2025-12-06T16:49:47.677+0000
C000628,H0041,P0001316,CL0000003,2020-05-24,2020-05-27,2020-09-19,2020-05-30,Fire,Moderate,4,4250.23,0,0,0,0,2019-08-13,2020-10-02,2781.41,2023-02-28,1992-12-08,0,1,0,0,0,0,0,0,0,115,27,2025-12-06,2025-12-06T16:49:47.677+0000
C000097,H0021,P0001878,CL0000004,2024-03-26,2024-03-31,2024-05-29,2024-04-03,Weather,Total Loss,3,8252.43,0,0,0,0,2024-01-29,2025-01-10,2294.64,2024-07-07,1970-07-25,0,1,0,0,0,0,0,0,0,59,53,2025-12-06,2025-12-06T16:49:47.677+0000
C000621,H0004,P0001972,CL0000005,2019-05-05,2019-05-07,2019-06-20,2019-05-07,Weather,Severe,3,1708.81,0,0,0,0,2018-06-24,2019-06-02,2454.24,2022-06-02,1966-05-03,0,1,0,0,0,0,0,0,0,44,53,2025-12-06,2025-12-06T16:49:47.677+0000


In [0]:
df_customer.write.format("delta").mode("overwrite").option("overwriteSchema","true").save(f"{silver_path}/dim_customer")
df_policy.write.format("delta").mode("overwrite").option("overwriteSchema","true").save(f"{silver_path}/dim_policy")
df_handler.write.format("delta").mode("overwrite").option("overwriteSchema","true").save(f"{silver_path}/dim_handler")
df_claims.write.format("delta").mode("overwrite").option("overwriteSchema","true").save(f"{silver_path}/fact_claims")


In [0]:
import datetime as dt

start_date = dt.date(2015, 1, 1)
end_date = dt.date(2030, 12, 31)

delta_days = (end_date - start_date).days + 1
date_list = [start_date + dt.timedelta(days=i) for i in range(delta_days)]

df_date = spark.createDataFrame(date_list, T.DateType()).toDF("Date")

df_date = (
    df_date
    .withColumn("Day", F.dayofmonth("Date"))
    .withColumn("Month", F.month("Date"))
    .withColumn("MonthName", F.date_format("Date", "MMMM"))
    .withColumn("Year", F.year("Date"))
    .withColumn("Quarter", F.quarter("Date"))
    .withColumn("WeekOfYear", F.weekofyear("Date"))
    .withColumn("DayOfWeek", F.date_format("Date", "E"))   # 1=Monday
    .withColumn("DayName", F.date_format("Date", "EEEE"))
    .withColumn(
        "IsWeekend",
        F.when(F.col("DayOfWeek").isin("6", "7"), F.lit(1)).otherwise(F.lit(0))
    )
)

df_date = add_metadata_column(df_date)

display(df_date.limit(10))

df_date.write.format("delta").mode("overwrite").option("overwriteSchema", "true") \
    .save(f"{silver_path}/dim_date")


Date,Day,Month,MonthName,Year,Quarter,WeekOfYear,DayOfWeek,DayName,IsWeekend,ingestion_date,update_timestamp
2015-01-01,1,1,January,2015,1,1,Thu,Thursday,0,2025-12-06,2025-12-06T16:50:04.922+0000
2015-01-02,2,1,January,2015,1,1,Fri,Friday,0,2025-12-06,2025-12-06T16:50:04.922+0000
2015-01-03,3,1,January,2015,1,1,Sat,Saturday,0,2025-12-06,2025-12-06T16:50:04.922+0000
2015-01-04,4,1,January,2015,1,1,Sun,Sunday,0,2025-12-06,2025-12-06T16:50:04.922+0000
2015-01-05,5,1,January,2015,1,2,Mon,Monday,0,2025-12-06,2025-12-06T16:50:04.922+0000
2015-01-06,6,1,January,2015,1,2,Tue,Tuesday,0,2025-12-06,2025-12-06T16:50:04.922+0000
2015-01-07,7,1,January,2015,1,2,Wed,Wednesday,0,2025-12-06,2025-12-06T16:50:04.922+0000
2015-01-08,8,1,January,2015,1,2,Thu,Thursday,0,2025-12-06,2025-12-06T16:50:04.922+0000
2015-01-09,9,1,January,2015,1,2,Fri,Friday,0,2025-12-06,2025-12-06T16:50:04.922+0000
2015-01-10,10,1,January,2015,1,2,Sat,Saturday,0,2025-12-06,2025-12-06T16:50:04.922+0000
