In [1]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col
spark = SparkSession.builder \
.appName('Logistics Delivery Company') \
.getOrCreate()

In [2]:
from pyspark.sql.types import *
from pyspark.sql.functions import *

In [3]:
from pyspark.sql import functions as F

#PHASE 1 — SCHEMA DESIGN & INGESTION

1. Define explicit schemas for all datasets

2. Load raw delivery data using schema enforcement

In [4]:
delivery_data = [
("DLV001","Delhi ","D001","Delivered","120","2024-01-05 10:30"),
("DLV002","Mumbai","D002","Delivered","90","05/01/2024 11:00"),
("DLV003","Bangalore","D003","In Transit","200","2024/01/06 09:45"),
("DLV004","Delhi","D004","Cancelled","","2024-01-07 14:00"),
("DLV005","Chennai","D002","Delivered","invalid","2024-01-08 16:20"),
("DLV006","Mumbai","D005","Delivered",None,"2024-01-08 18:10"),
("DLV007","Delhi","D001","Delivered","140","09-01-2024 12:30"),
("DLV008","Bangalore","D003","Delivered","160","2024-01-09 15:45"),
("DLV009","Mumbai","D004","Delivered","110","2024-01-10 13:20"),
("DLV009","Mumbai","D004","Delivered","110","2024-01-10 13:20")
]

column = ["delivery_id", "city", "driver_id", "status", "delivery_time_minutes", "delivery_timestamp"]


In [5]:
delivery_schema = StructType([
    StructField("delivery_id", StringType(), True),
    StructField("city", StringType(), True),
    StructField("driver_id", StringType(), True),
    StructField("status", StringType(), True),
    StructField("delivery_time_minutes", IntegerType(), True),
    StructField("delivery_timestamp", StringType(), True)
])
delivery_df = spark.createDataFrame(data=delivery_data, schema=column)

In [6]:
driver_data = [
("D001","Ravi","Senior"),
("D002","Amit","Junior"),
("D003","Sneha","Senior"),
("D004","Karan","Junior"),
("D005","Neha","Senior")
]

In [7]:
driver_schema = StructType([
    StructField("driver_id", StringType(), True),
    StructField("driver_name", StringType(), True),
    StructField("driver_type", StringType(), True)
])
driver_df = spark.createDataFrame(data=driver_data, schema=driver_schema)

In [8]:
city_zone_data = [
("Delhi","North"),
("Mumbai","West"),
("Bangalore","South"),
("Chennai","South")
]

In [9]:
city_zone_schema = StructType([
    StructField("city", StringType(), True),
    StructField("zone", StringType(), True)
])
city_zone_df = spark.createDataFrame(data=city_zone_data, schema=city_zone_schema)

3. Identify and flag corrupt records

In [10]:
delivery_df = delivery_df.withColumn(
    "is_corrupt",
    F.when(
    (F.col("delivery_time_minutes").isNull()) | (F.col("delivery_time_minutes").rlike("^[0-9]+$")),
    True).otherwise(False))

4. Validate schema correctness

In [11]:
delivery_df.printSchema()

root
 |-- delivery_id: string (nullable = true)
 |-- city: string (nullable = true)
 |-- driver_id: string (nullable = true)
 |-- status: string (nullable = true)
 |-- delivery_time_minutes: string (nullable = true)
 |-- delivery_timestamp: string (nullable = true)
 |-- is_corrupt: boolean (nullable = false)



#PHASE 2 — DATA CLEANING & STANDARDIZATION

5. Trim all string columns


In [12]:
for c in delivery_df.columns:
  delivery_df = delivery_df.withColumn(c, F.trim(col(c)))

6. Standardize status values


In [13]:
delivery_df = delivery_df.withColumn(
    "status",
    F.lower(F.col("status")
))

7. Convert delivery_time_minutes to IntegerType


In [14]:
delivery_df = delivery_df.withColumn(
    "delivery_time_minutes",
    F.when(
        F.col("delivery_time_minutes").rlike("^[0-9]+$"),
        F.col("delivery_time_minutes").cast(IntegerType())
    ).otherwise(F.lit(None).cast(IntegerType()))
)

8. Handle invalid and null delivery times


In [15]:
delivery_df = delivery_df.withColumn(
    "delivery_time_minutes",
    F.when(
        F.col("delivery_time_minutes").isNull() | (F.col("delivery_time_minutes") < 0),
        0
    ).otherwise(F.col("delivery_time_minutes"))
)

9. Parse multiple timestamp formats into TimestampType


In [16]:
delivery_df = delivery_df.withColumn(
    "delivery_timestamp",
    F.coalesce(
        F.try_to_timestamp(F.col("delivery_timestamp"), F.lit("yyyy-MM-dd HH:mm")),
        F.try_to_timestamp(F.col("delivery_timestamp"), F.lit("dd/MM/yyyy HH:mm")),
        F.try_to_timestamp(F.col("delivery_timestamp"), F.lit("yyyy/MM/dd HH:mm")),
        F.try_to_timestamp(F.col("delivery_timestamp"), F.lit("dd-MM-yyyy HH:mm"))
    )
)

10. Remove duplicate delivery IDs

In [17]:
delivery_df = delivery_df.drop_duplicates(subset=['delivery_id'])

#PHASE 3 — BUSINESS FILTERING

11. Keep only Delivered deliveries


In [18]:
delivery_df = delivery_df.filter(F.col("status") == "delivered")

12. Remove cancelled and in-transit deliveries


In [19]:
# This step is implicitly handled by step 11, which filtered to keep only 'delivered' statuses.

13. Validate record counts before and after filtering

In [20]:
original_count = spark.createDataFrame(data=delivery_data, schema=column).count()
filtered_count = delivery_df.count()
print(f"Original record count: {original_count}")
print(f"Filtered record count (delivered status only): {filtered_count}")

Original record count: 10
Filtered record count (delivered status only): 7


#PHASE 4 — DATA ENRICHMENT & JOINS

14. Join delivery data with driver master


In [21]:
delivery_df = delivery_df.join(driver_df, on='driver_id', how='inner')

15. Join enriched data with city zone lookup


In [22]:
delivery_df = delivery_df.join(city_zone_df, on='city', how='inner')

16. Use broadcast join where appropriate


In [23]:
from pyspark.sql.functions import broadcast
delivery_df = delivery_df.join(broadcast(city_zone_df), on='city', how='inner')

17. Explain join strategy using explain(True)

In [24]:
delivery_df.explain(True)

== Parsed Logical Plan ==
'Join UsingJoin(Inner, [city])
:- Project [city#13, driver_id#14, delivery_id#12, status#19, delivery_time_minutes#21, delivery_timestamp#22, is_corrupt#18, driver_name#7, driver_type#8, zone#10]
:  +- Join Inner, (city#13 = city#9)
:     :- Project [driver_id#14, delivery_id#12, city#13, status#19, delivery_time_minutes#21, delivery_timestamp#22, is_corrupt#18, driver_name#7, driver_type#8]
:     :  +- Join Inner, (driver_id#14 = driver_id#6)
:     :     :- Filter (status#19 = delivered)
:     :     :  +- Deduplicate [delivery_id#12]
:     :     :     +- Project [delivery_id#12, city#13, driver_id#14, status#19, delivery_time_minutes#21, coalesce(try_to_timestamp(delivery_timestamp#17, Some(yyyy-MM-dd HH:mm), TimestampType, Some(Etc/UTC), false), try_to_timestamp(delivery_timestamp#17, Some(dd/MM/yyyy HH:mm), TimestampType, Some(Etc/UTC), false), try_to_timestamp(delivery_timestamp#17, Some(yyyy/MM/dd HH:mm), TimestampType, Some(Etc/UTC), false), try_to_times

#PHASE 5 — ANALYTICS & WINDOW FUNCTIONS

18. Average delivery time per city

In [25]:
avg_delivery_time_per_city_df = delivery_df.groupBy("city").agg(
    F.avg("delivery_time_minutes").alias("avg_delivery_time_minutes"))

19. Average delivery time per driver


In [26]:
avg_delivery_time_per_driver_df = delivery_df.groupBy("driver_id").agg(
    F.avg("delivery_time_minutes").alias("avg_delivery_time_minutes"))

20. Rank drivers by performance within each city


In [27]:
from pyspark.sql.window import Window

window_spec_city = Window.partitionBy("city").orderBy(F.col("delivery_time_minutes").asc())

driver_rank_per_city_df = delivery_df.withColumn(
    "rank_per_city",
    F.rank().over(window_spec_city)
)

21. Identify fastest driver per zone


In [28]:
from pyspark.sql.window import Window

# The 'delivery_df' currently has two 'zone' columns due to previous joins,
# leading to an 'AMBIGUOUS_REFERENCE' error when trying to access 'zone'.
# To resolve this, we first drop all 'zone' columns from delivery_df.
df_no_ambiguous_zone = delivery_df.drop('zone')

# Then, we re-join with 'city_zone_df' to bring in a single, unambiguous 'zone' column.
df_for_zone_analysis = df_no_ambiguous_zone.join(city_zone_df, on='city', how='inner')

# Calculate average delivery time per driver per zone
# Now, 'zone' is unambiguous, so we can group by it.
avg_delivery_time_per_driver_zone = df_for_zone_analysis.groupBy("zone", "driver_id", "driver_name") \
    .agg(F.avg("delivery_time_minutes").alias("avg_delivery_time_minutes"))

# Define window specification for ranking within each zone
zone_window_spec = Window.partitionBy("zone").orderBy(F.col("avg_delivery_time_minutes").asc())

# Rank drivers by average delivery time within each zone and filter for rank 1 (fastest)
fastest_driver_per_zone_df = avg_delivery_time_per_driver_zone.withColumn(
    "rank",
    F.rank().over(zone_window_spec)
).filter(F.col("rank") == 1)

fastest_driver_per_zone_df.show()

+-----+---------+-----------+-------------------------+----+
| zone|driver_id|driver_name|avg_delivery_time_minutes|rank|
+-----+---------+-----------+-------------------------+----+
|North|     D001|       Ravi|                    130.0|   1|
|South|     D002|       Amit|                      0.0|   1|
| West|     D005|       Neha|                      0.0|   1|
+-----+---------+-----------+-------------------------+----+



22. Identify top 2 drivers per city

In [29]:
from pyspark.sql.window import Window

avg_delivery_time_per_driver_city = delivery_df.groupBy("city", "driver_id", "driver_name")\
    .agg(F.avg("delivery_time_minutes").alias("avg_delivery_time_minutes"))

window_spec_city_driver = Window.partitionBy("city").orderBy(F.col("avg_delivery_time_minutes").asc())

top_2_drivers_per_city_df = avg_delivery_time_per_driver_city.withColumn(
    "rank_in_city",
    F.rank().over(window_spec_city_driver)
).filter(F.col("rank_in_city") <= 2)

top_2_drivers_per_city_df.show()

+---------+---------+-----------+-------------------------+------------+
|     city|driver_id|driver_name|avg_delivery_time_minutes|rank_in_city|
+---------+---------+-----------+-------------------------+------------+
|Bangalore|     D003|      Sneha|                    160.0|           1|
|  Chennai|     D002|       Amit|                      0.0|           1|
|    Delhi|     D001|       Ravi|                    130.0|           1|
|   Mumbai|     D005|       Neha|                      0.0|           1|
|   Mumbai|     D002|       Amit|                     90.0|           2|
+---------+---------+-----------+-------------------------+------------+



#PHASE 6 — PERFORMANCE OPTIMIZATION

23. Identify DataFrames reused multiple times


In [30]:
delivery_df.cache()

DataFrame[city: string, driver_id: string, delivery_id: string, status: string, delivery_time_minutes: int, delivery_timestamp: timestamp, is_corrupt: string, driver_name: string, driver_type: string, zone: string, zone: string]

24. Apply caching appropriately


In [31]:
delivery_df.count()

7

25. Compare execution plans with and without cache


In [32]:
delivery_df.explain(True)

== Parsed Logical Plan ==
'Join UsingJoin(Inner, [city])
:- Project [city#13, driver_id#14, delivery_id#12, status#19, delivery_time_minutes#21, delivery_timestamp#22, is_corrupt#18, driver_name#7, driver_type#8, zone#10]
:  +- Join Inner, (city#13 = city#9)
:     :- Project [driver_id#14, delivery_id#12, city#13, status#19, delivery_time_minutes#21, delivery_timestamp#22, is_corrupt#18, driver_name#7, driver_type#8]
:     :  +- Join Inner, (driver_id#14 = driver_id#6)
:     :     :- Filter (status#19 = delivered)
:     :     :  +- Deduplicate [delivery_id#12]
:     :     :     +- Project [delivery_id#12, city#13, driver_id#14, status#19, delivery_time_minutes#21, coalesce(try_to_timestamp(delivery_timestamp#17, Some(yyyy-MM-dd HH:mm), TimestampType, Some(Etc/UTC), false), try_to_timestamp(delivery_timestamp#17, Some(dd/MM/yyyy HH:mm), TimestampType, Some(Etc/UTC), false), try_to_timestamp(delivery_timestamp#17, Some(yyyy/MM/dd HH:mm), TimestampType, Some(Etc/UTC), false), try_to_times

26. Repartition data by city

In [33]:
repartition_df = delivery_df.repartition(col("city"))

27. Explain why repartitioning improves performance

In [34]:
#Reduces skew during aggregations
#Improves parallelism
#Optimizes shuffle boundaries

#PHASE 7 — FILE FORMAT STRATEGY

28. Write cleaned delivery data to Parquet


In [35]:
delivery_df_cleaned_for_write = delivery_df.drop(delivery_df.columns[9]) # Assuming the second 'zone' is at index 9
delivery_df_cleaned_for_write.write.mode("overwrite").parquet("delivery_data.parquet")

29. Write aggregated analytics to ORC


In [36]:
fastest_driver_per_zone_df.write.mode("overwrite").orc("fastest_drivers_per_zone.orc")

30. Compare file output structure


In [37]:
print("\nSchema of Parquet file (delivery_data.parquet):")
parquet_df_read = spark.read.parquet("delivery_data.parquet")
parquet_df_read.printSchema()

print("\nSchema of ORC file (fastest_drivers_per_zone.orc):")
orc_df_read = spark.read.orc("fastest_drivers_per_zone.orc")
orc_df_read.printSchema()


Schema of Parquet file (delivery_data.parquet):
root
 |-- city: string (nullable = true)
 |-- driver_id: string (nullable = true)
 |-- delivery_id: string (nullable = true)
 |-- status: string (nullable = true)
 |-- delivery_time_minutes: integer (nullable = true)
 |-- delivery_timestamp: timestamp (nullable = true)
 |-- is_corrupt: string (nullable = true)
 |-- driver_name: string (nullable = true)
 |-- driver_type: string (nullable = true)


Schema of ORC file (fastest_drivers_per_zone.orc):
root
 |-- zone: string (nullable = true)
 |-- driver_id: string (nullable = true)
 |-- driver_name: string (nullable = true)
 |-- avg_delivery_time_minutes: double (nullable = true)
 |-- rank: integer (nullable = true)



31. Explain why Avro is suitable for future real-time tracking

In [38]:
#Schema evolution
#Row-based
#Streaming compatibility

#PHASE 8 — DEBUGGING & ERROR ANALYSIS

32. Identify potential NoneType errors


In [42]:
print(delivery_df.columns)

['city', 'driver_id', 'delivery_id', 'status', 'delivery_time_minutes', 'delivery_timestamp', 'is_corrupt', 'driver_name', 'driver_type', 'zone', 'zone']


In [51]:

from pyspark.sql import functions as F
from collections import Counter

# --- 1) Make column names unique (so Spark can reference them reliably)
if len(delivery_df.columns) != len(set(delivery_df.columns)):
    seen = {}
    new_cols = []
    for c in delivery_df.columns:
        if c not in seen:
            seen[c] = 0
            new_cols.append(c)
        else:
            seen[c] += 1
            new_cols.append(f"{c}_dup{seen[c]}")
    delivery_df = delivery_df.toDF(*new_cols)

# --- 2) Coalesce duplicate 'zone' columns into a single 'zone'
zone_like = [c for c in delivery_df.columns if c == "zone" or c.startswith("zone_dup")]
if "zone" in delivery_df.columns and len(zone_like) > 1:
    # Prefer original 'zone', then any zone_dup* columns
    delivery_df = delivery_df.withColumn("zone", F.coalesce(*[F.col(c) for c in ["zone"] + [c for c in zone_like if c != "zone"]]))
    # Drop the duplicate zone columns
    delivery_df = delivery_df.drop(*[c for c in zone_like if c != "zone"])

# --- 3) Quick checks for potential None/NULL issues
# 3a) Count NULLs per column (safe, no ambiguous references)
null_counts = delivery_df.select([
    F.count(F.when(F.col(c).isNull(), F.lit(1))).alias(c) for c in delivery_df.columns
]).collect()[0].asDict()

print("NULL counts per column:", null_counts)

# 3b) Flag if the join key 'zone' is NULL anywhere (common source of NoneType errors later)
zone_nulls = delivery_df.filter(F.col("zone").isNull()).limit(1).count()
if zone_nulls > 0:
    print(f"Warning: 'zone' has NULLs; downstream joins/expressions may hit NoneType/ambiguous errors.")

NULL counts per column: {'city': 0, 'driver_id': 0, 'delivery_id': 0, 'status': 0, 'delivery_time_minutes': 0, 'delivery_timestamp': 0, 'is_corrupt': 0, 'driver_name': 0, 'driver_type': 0, 'zone': 0}


33. Identify schema mismatch risks


In [53]:

from pyspark.sql import functions as F

# --- Fix duplicate 'zone' columns ---
zone_cols = [c for c in delivery_df.columns if c == "zone"]
if len(zone_cols) > 1:
    # Rename duplicates to make them accessible
    delivery_df = delivery_df.toDF(*[
        f"{c}_{i}" if c == "zone" and i > 0 else c
        for i, c in enumerate(delivery_df.columns)
    ])
    # Coalesce zone columns into one
    zone_variants = [c for c in delivery_df.columns if c.startswith("zone")]
    delivery_df = delivery_df.withColumn("zone", F.coalesce(*[F.col(c) for c in zone_variants]))
    # Drop extra zone columns
    delivery_df = delivery_df.drop(*[c for c in zone_variants if c != "zone"])

# --- Identify potential NoneType risks ---
null_counts = delivery_df.select([
    F.count(F.when(F.col(c).isNull(), 1)).alias(c) for c in delivery_df.columns
]).collect()[0].asDict()

print("NULL counts per column:", null_counts)
if null_counts.get("zone", 0) > 0:
    print("⚠ Warning: 'zone' has NULL values. Consider fillna or filter before joins.")

NULL counts per column: {'city': 0, 'driver_id': 0, 'delivery_id': 0, 'status': 0, 'delivery_time_minutes': 0, 'delivery_timestamp': 0, 'is_corrupt': 0, 'driver_name': 0, 'driver_type': 0, 'zone': 0}


34. Debug an intentionally broken transformation


In [52]:
# Intentionally broken transformation: Attempting numeric aggregation on a string column with non-numeric values.

# 1. Create a DataFrame that mirrors the initial raw data state for 'delivery_time_minutes'
#    where it's treated as a StringType, including non-numeric entries ('invalid', '').
broken_df = spark.createDataFrame(data=delivery_data, schema=column)

print("Schema of the intentionally broken DataFrame before aggregation:")
broken_df.printSchema()

print("\nAttempting to calculate the sum of 'delivery_time_minutes'...")
try:
    # This line is expected to fail due to non-numeric values in 'delivery_time_minutes'
    broken_df.groupBy("city").agg(F.sum("delivery_time_minutes").alias("total_delivery_time")).show()
except Exception as e:
    print("\n--- ERROR ENCOUNTERED ---")
    print(f"Error Type: {type(e).__name__}")
    print(f"Error Message: {e}")
    print("\nThis `NumberFormatException` occurs because the 'delivery_time_minutes' column is a StringType and contains non-numeric string values (like 'invalid', empty strings, or None) that Spark cannot implicitly convert to a numeric type for the sum aggregation. Spark expects numeric types for such operations.")
    print("\n--- Proposed Fix ---")
    print("To fix this, we need to explicitly cast the 'delivery_time_minutes' column to a numeric type (e.g., IntegerType or DoubleType), and handle the malformed data gracefully. A robust approach involves checking if the string values are numeric before casting, and providing a default value (like 0 or null) for non-numeric entries.")
    print("\nHere's an example of how to fix this transformation:")
    print("""
fixed_broken_df = broken_df.withColumn(
    "delivery_time_minutes_fixed",
    F.when(
        F.col("delivery_time_minutes").rlike("^[0-9]+$"), # Check if string contains only digits
        F.col("delivery_time_minutes").cast(IntegerType())
    ).otherwise(F.lit(0).cast(IntegerType())) # Default to 0 for non-numeric or nulls
)

print("\nSchema after applying fix:")
fixed_broken_df.printSchema()

print("\nResult after applying fix (sum of 'delivery_time_minutes_fixed'):")
fixed_broken_df.groupBy("city").agg(F.sum("delivery_time_minutes_fixed").alias("total_delivery_time")).show()
""")

Schema of the intentionally broken DataFrame before aggregation:
root
 |-- delivery_id: string (nullable = true)
 |-- city: string (nullable = true)
 |-- driver_id: string (nullable = true)
 |-- status: string (nullable = true)
 |-- delivery_time_minutes: string (nullable = true)
 |-- delivery_timestamp: string (nullable = true)


Attempting to calculate the sum of 'delivery_time_minutes'...


{"ts": "2025-12-23 09:59:47.469", "level": "ERROR", "logger": "DataFrameQueryContextLogger", "msg": "[CAST_INVALID_INPUT] The value '' of the type \"STRING\" cannot be cast to \"DOUBLE\" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. SQLSTATE: 22018", "context": {"file": "java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)", "line": "", "fragment": "sum", "errorClass": "CAST_INVALID_INPUT"}, "exception": {"class": "Py4JJavaError", "msg": "An error occurred while calling o997.showString.\n: org.apache.spark.SparkNumberFormatException: [CAST_INVALID_INPUT] The value '' of the type \"STRING\" cannot be cast to \"DOUBLE\" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. SQLSTATE: 22018\n== DataFrame ==\n\"sum\" was called from\njava.base/jdk.interna


--- ERROR ENCOUNTERED ---
Error Type: NumberFormatException
Error Message: [CAST_INVALID_INPUT] The value '' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. SQLSTATE: 22018
== DataFrame ==
"sum" was called from
java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)


This `NumberFormatException` occurs because the 'delivery_time_minutes' column is a StringType and contains non-numeric string values (like 'invalid', empty strings, or None) that Spark cannot implicitly convert to a numeric type for the sum aggregation. Spark expects numeric types for such operations.

--- Proposed Fix ---
To fix this, we need to explicitly cast the 'delivery_time_minutes' column to a numeric type (e.g., IntegerType or DoubleType), and handle the malformed data gracefully. A robust approach involves checking if the string valu

35. Use explain plan to find inefficient operations

In [40]:
delivery_df.explain(True)

== Parsed Logical Plan ==
'Join UsingJoin(Inner, [city])
:- Project [city#13, driver_id#14, delivery_id#12, status#19, delivery_time_minutes#21, delivery_timestamp#22, is_corrupt#18, driver_name#7, driver_type#8, zone#10]
:  +- Join Inner, (city#13 = city#9)
:     :- Project [driver_id#14, delivery_id#12, city#13, status#19, delivery_time_minutes#21, delivery_timestamp#22, is_corrupt#18, driver_name#7, driver_type#8]
:     :  +- Join Inner, (driver_id#14 = driver_id#6)
:     :     :- Filter (status#19 = delivered)
:     :     :  +- Deduplicate [delivery_id#12]
:     :     :     +- Project [delivery_id#12, city#13, driver_id#14, status#19, delivery_time_minutes#21, coalesce(try_to_timestamp(delivery_timestamp#17, Some(yyyy-MM-dd HH:mm), TimestampType, Some(Etc/UTC), false), try_to_timestamp(delivery_timestamp#17, Some(dd/MM/yyyy HH:mm), TimestampType, Some(Etc/UTC), false), try_to_timestamp(delivery_timestamp#17, Some(yyyy/MM/dd HH:mm), TimestampType, Some(Etc/UTC), false), try_to_times