### Import File

In [None]:
import os
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, DoubleType


file_paths = [
    'path1',
    'path2',
    'path3',
]

def dict_to_schema(col_type_dict):
    return StructType([
        StructField(col, typ, True) for col, typ in col_type_dict.items()
    ])


def load_data(file_path, schema):
    """
    Load data from a CSV file into a Spark DataFrame.
    
    :param file_path: Path to the CSV file
    :return: Spark DataFrame
    """
    schema = dict_to_schema(behavior_schema_dict)
    df = spark.read.csv(file_path, header=True, schema=schema)
    # df.show(5, truncate=False)  # Show first 5 rows for debugging
    return df



# Create a Spark session
spark = SparkSession.builder \
    .appName("Simple Example") \
    .getOrCreate()

from pyspark.sql.types import StructType, StructField, StringType, IntegerType, DoubleType

behavior_schema_dict = {
    "Tunnel": StringType(),
    "Device": StringType(),
    "ShopId": IntegerType(),
    "ShopMemberId": StringType(),
    "FullvisitorId": StringType(),
    "DeviceId": StringType(),
    "HitTime": DoubleType(),
    "Language": StringType(),
    "CountryAliasCode": StringType(),
    "Version": StringType(),
    "UTMSource": StringType(),
    "UTMMedium": StringType(),
    "UTMName": StringType(),
    "Behavior": StringType(),
    "RegisterTunnel": StringType(),
    "CategoryId": StringType(),
    "SalePageId": StringType(),
    "UnitPrice": DoubleType(),
    "Qty": IntegerType(),
    "TotalSalesAmount": DoubleType(),
    "CurrencyCode": StringType(),
    "TradesGroupCode": StringType(),
    "SearchTerm": StringType(),
    "ContentType": StringType(),
    "ContentName": StringType(),
    "ContentId": StringType(),
    "PageType": StringType(),
    "EventTime": DoubleType()
}

behavior_schema = dict_to_schema(behavior_schema_dict)

df = spark.createDataFrame([], behavior_schema)

for file_path in file_paths:
    print(f'Processing {file_path}')

    for root , dirs, files in os.walk(file_path):
        for file in files:
            if file.endswith('.csv'):
                file_path = os.path.join(root, file)
                print(f'Processing {file_path}')
                tmp_df = load_data(file_path, behavior_schema)
                if df.isEmpty():
                    df = tmp_df
                else:
                    df = df.unionByName(tmp_df, allowMissingColumns=True)
                print(f'Current DataFrame shape: {df.count()} rows')

# Show the schema of the final DataFrame
df.printSchema()
# Show the first few rows of the final DataFrame
df.show(5, truncate=False)


Using Spark's default log4j profile: org/apache/spark/log4j2-defaults.properties
25/06/01 11:32:02 WARN Utils: Your hostname, Bryant-Lius-MacBook-Pro-2.local, resolves to a loopback address: 127.0.0.1; using 192.168.0.8 instead (on interface en0)
25/06/01 11:32:02 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
Using Spark's default log4j profile: org/apache/spark/log4j2-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
25/06/01 11:32:02 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


Processing /Users/bryant_lue/Documents/Data_Tmp/BDA/91APP_Dataset(session00)
Processing /Users/bryant_lue/Documents/Data_Tmp/BDA/91APP_Dataset(session00)/session00_202401.csv


25/06/01 11:32:05 WARN SparkStringUtils: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.
                                                                                

Current DataFrame shape: 23258882 rows
Processing /Users/bryant_lue/Documents/Data_Tmp/BDA/91APP_Dataset(session00)/session00_202402.csv


                                                                                

Current DataFrame shape: 49254287 rows
Processing /Users/bryant_lue/Documents/Data_Tmp/BDA/91APP_Dataset(session00)/session00_202311.csv


                                                                                

Current DataFrame shape: 72157896 rows
Processing /Users/bryant_lue/Documents/Data_Tmp/BDA/91APP_Dataset(session00)/session00_202310.csv


                                                                                

Current DataFrame shape: 97430096 rows
Processing /Users/bryant_lue/Documents/Data_Tmp/BDA/91APP_Dataset(session00)/session00_202312.csv


                                                                                

Current DataFrame shape: 121865226 rows
Processing /Users/bryant_lue/Documents/Data_Tmp/BDA/91APP_Dataset(session00)/session00_202309.csv


                                                                                

Current DataFrame shape: 143531695 rows
Processing /Users/bryant_lue/Documents/Data_Tmp/BDA/91APP_Dataset(session01)
Processing /Users/bryant_lue/Documents/Data_Tmp/BDA/91APP_Dataset(session01)/session01_202309.csv


                                                                                

Current DataFrame shape: 157330487 rows
Processing /Users/bryant_lue/Documents/Data_Tmp/BDA/91APP_Dataset(session01)/session01_202401.csv


                                                                                

Current DataFrame shape: 173507740 rows
Processing /Users/bryant_lue/Documents/Data_Tmp/BDA/91APP_Dataset(session01)/session01_202402.csv


                                                                                

Current DataFrame shape: 189655690 rows
Processing /Users/bryant_lue/Documents/Data_Tmp/BDA/91APP_Dataset(session01)/session01_202311.csv


                                                                                

Current DataFrame shape: 206046915 rows
Processing /Users/bryant_lue/Documents/Data_Tmp/BDA/91APP_Dataset(session01)/session01_202310.csv


                                                                                

Current DataFrame shape: 222194665 rows
Processing /Users/bryant_lue/Documents/Data_Tmp/BDA/91APP_Dataset(session01)/session01_202312.csv


                                                                                

Current DataFrame shape: 236142172 rows
Processing /Users/bryant_lue/Documents/Data_Tmp/BDA/91APP_Dataset(session02)
Processing /Users/bryant_lue/Documents/Data_Tmp/BDA/91APP_Dataset(session02)/session02_202310.csv


                                                                                

Current DataFrame shape: 238178721 rows
Processing /Users/bryant_lue/Documents/Data_Tmp/BDA/91APP_Dataset(session02)/session02_202311.csv


                                                                                

Current DataFrame shape: 240560478 rows
Processing /Users/bryant_lue/Documents/Data_Tmp/BDA/91APP_Dataset(session02)/session02_202312.csv


                                                                                

Current DataFrame shape: 242603898 rows
Processing /Users/bryant_lue/Documents/Data_Tmp/BDA/91APP_Dataset(session02)/session02_202401.csv


                                                                                

Current DataFrame shape: 244640349 rows
Processing /Users/bryant_lue/Documents/Data_Tmp/BDA/91APP_Dataset(session02)/session02_202402.csv


                                                                                

Current DataFrame shape: 246596048 rows
Processing /Users/bryant_lue/Documents/Data_Tmp/BDA/91APP_Dataset(session02)/session02_202309.csv


                                                                                

Current DataFrame shape: 248268733 rows
root
 |-- Tunnel: string (nullable = true)
 |-- Device: string (nullable = true)
 |-- ShopId: integer (nullable = true)
 |-- ShopMemberId: string (nullable = true)
 |-- FullvisitorId: string (nullable = true)
 |-- DeviceId: string (nullable = true)
 |-- HitTime: double (nullable = true)
 |-- Language: string (nullable = true)
 |-- CountryAliasCode: string (nullable = true)
 |-- Version: string (nullable = true)
 |-- UTMSource: string (nullable = true)
 |-- UTMMedium: string (nullable = true)
 |-- UTMName: string (nullable = true)
 |-- Behavior: string (nullable = true)
 |-- RegisterTunnel: string (nullable = true)
 |-- CategoryId: string (nullable = true)
 |-- SalePageId: string (nullable = true)
 |-- UnitPrice: double (nullable = true)
 |-- Qty: integer (nullable = true)
 |-- TotalSalesAmount: double (nullable = true)
 |-- CurrencyCode: string (nullable = true)
 |-- TradesGroupCode: string (nullable = true)
 |-- SearchTerm: string (nullable = tr

----------------------------------------
Exception occurred during processing of request from ('127.0.0.1', 53287)
Traceback (most recent call last):
  File "/opt/anaconda3/envs/DS311/lib/python3.11/socketserver.py", line 317, in _handle_request_noblock
    self.process_request(request, client_address)
  File "/opt/anaconda3/envs/DS311/lib/python3.11/socketserver.py", line 348, in process_request
    self.finish_request(request, client_address)
  File "/opt/anaconda3/envs/DS311/lib/python3.11/socketserver.py", line 361, in finish_request
    self.RequestHandlerClass(request, client_address, self)
  File "/opt/anaconda3/envs/DS311/lib/python3.11/socketserver.py", line 755, in __init__
    self.handle()
  File "/opt/anaconda3/envs/DS311/lib/python3.11/site-packages/pyspark/accumulators.py", line 299, in handle
    poll(accum_updates)
  File "/opt/anaconda3/envs/DS311/lib/python3.11/site-packages/pyspark/accumulators.py", line 271, in poll
    if self.rfile in r and func():
              

### 篩選: Behavior == viewproduct

In [2]:
df_view = df.filter(df['Behavior'] == 'viewproduct')
df_view.show(5, truncate=False)

+------+---------+------+--------------------------------------------+----------------------------------------------------------------+----------------------------------------------------------------+-----------------+----------+----------------+-------+---------------------+------------+---------+-----------+--------------+----------+----------+---------+---+----------------+------------+---------------+----------+-----------+-----------+---------+--------+-----------------+
|Tunnel|Device   |ShopId|ShopMemberId                                |FullvisitorId                                                   |DeviceId                                                        |HitTime          |Language  |CountryAliasCode|Version|UTMSource            |UTMMedium   |UTMName  |Behavior   |RegisterTunnel|CategoryId|SalePageId|UnitPrice|Qty|TotalSalesAmount|CurrencyCode|TradesGroupCode|SearchTerm|ContentType|ContentName|ContentId|PageType|EventTime        |
+------+---------+------+-------------

### 篩選: 期間為 2023 年

In [3]:
df_not_null = df_view.filter(df_view['EventTime'].isNotNull())
df_not_null.show(5, truncate=False)

+------+---------+------+--------------------------------------------+----------------------------------------------------------------+----------------------------------------------------------------+-----------------+----------+----------------+-------+---------------------+------------+---------+-----------+--------------+----------+----------+---------+---+----------------+------------+---------------+----------+-----------+-----------+---------+--------+-----------------+
|Tunnel|Device   |ShopId|ShopMemberId                                |FullvisitorId                                                   |DeviceId                                                        |HitTime          |Language  |CountryAliasCode|Version|UTMSource            |UTMMedium   |UTMName  |Behavior   |RegisterTunnel|CategoryId|SalePageId|UnitPrice|Qty|TotalSalesAmount|CurrencyCode|TradesGroupCode|SearchTerm|ContentType|ContentName|ContentId|PageType|EventTime        |
+------+---------+------+-------------

In [4]:
from pyspark.sql.functions import from_unixtime, col
from pyspark.sql.functions import date_format

df_datetime = df_not_null.withColumn("EventDatetime", from_unixtime((col("EventTime") / 1000).cast("long")).cast("timestamp"))
# Extract month string
df_datetime_test = df_datetime.withColumn(
    "Month", date_format(col("EventDatetime"), "yyyy-MM")
)
df_datetime_test.show(20, truncate=False)

# show unique months
unique_months = df_datetime_test.select("Month").distinct().orderBy("Month")
unique_months.show(truncate=False)

+------+-----------+------+--------------------------------------------+----------------------------------------------------------------+----------------------------------------------------------------+-----------------+----------+----------------+-------+---------------------+------------+----------------+-----------+--------------+----------+----------+---------+---+----------------+------------+---------------+----------+-----------+-----------+---------+--------+-----------------+-------------------+-------+
|Tunnel|Device     |ShopId|ShopMemberId                                |FullvisitorId                                                   |DeviceId                                                        |HitTime          |Language  |CountryAliasCode|Version|UTMSource            |UTMMedium   |UTMName         |Behavior   |RegisterTunnel|CategoryId|SalePageId|UnitPrice|Qty|TotalSalesAmount|CurrencyCode|TradesGroupCode|SearchTerm|ContentType|ContentName|ContentId|PageType|EventTime  



+-------+
|Month  |
+-------+
|2023-09|
|2023-10|
|2023-11|
|2023-12|
|2024-01|
|2024-02|
+-------+



                                                                                

In [5]:
df_datetime = df_datetime.filter(
    col("EventDatetime").between("2023-01-01 00:00:00", "2023-12-31 23:59:59")
)

In [6]:
df_datetime.show(20, truncate=False)



+------+-----------+------+--------------------------------------------+----------------------------------------------------------------+----------------------------------------------------------------+-----------------+----------+----------------+-------+----------------+------------+-------------+-----------+--------------+----------+----------+---------+---+----------------+------------+---------------+----------+-----------+-----------+---------+--------+-----------------+-------------------+
|Tunnel|Device     |ShopId|ShopMemberId                                |FullvisitorId                                                   |DeviceId                                                        |HitTime          |Language  |CountryAliasCode|Version|UTMSource       |UTMMedium   |UTMName      |Behavior   |RegisterTunnel|CategoryId|SalePageId|UnitPrice|Qty|TotalSalesAmount|CurrencyCode|TradesGroupCode|SearchTerm|ContentType|ContentName|ContentId|PageType|EventTime        |EventDatetime    

                                                                                

In [7]:
# Extract month string
df_datetime = df_datetime.withColumn(
    "Month", date_format(col("EventDatetime"), "yyyy-MM")
)
df_datetime.show(20, truncate=False)

# show unique months
unique_months = df_datetime.select("Month").distinct().orderBy("Month")
unique_months.show(truncate=False)

                                                                                

+------+-----------+------+--------------------------------------------+----------------------------------------------------------------+----------------------------------------------------------------+-----------------+----------+----------------+-------+----------------+------------+-------------+-----------+--------------+----------+----------+---------+---+----------------+------------+---------------+----------+-----------+-----------+---------+--------+-----------------+-------------------+-------+
|Tunnel|Device     |ShopId|ShopMemberId                                |FullvisitorId                                                   |DeviceId                                                        |HitTime          |Language  |CountryAliasCode|Version|UTMSource       |UTMMedium   |UTMName      |Behavior   |RegisterTunnel|CategoryId|SalePageId|UnitPrice|Qty|TotalSalesAmount|CurrencyCode|TradesGroupCode|SearchTerm|ContentType|ContentName|ContentId|PageType|EventTime        |EventDate



+-------+
|Month  |
+-------+
|2023-09|
|2023-10|
|2023-11|
|2023-12|
+-------+



                                                                                

### Load 商品頁資料

In [8]:
# laod file
sale_page_schema_dict = {
    "ShopId": IntegerType(),
    "SalePageId": StringType(),  # Even if described as integer, better as String for joins
    "SalePageTitle": StringType(),
    "SaleProductDescShortContent": StringType(),
}

sale_page_schema = dict_to_schema(sale_page_schema_dict)

df_sale_page = spark.read.csv(
    '/Users/bryant_lue/Documents/Data_Tmp/BDA/91APP_Dataset(會員&主單&子單&商品頁&標籤)/SalePage.csv',
    header=True,
    schema=sale_page_schema
)

In [9]:
df_sale_page.show(5, truncate=False)

+------+----------+----------------------+---------------------------+
|ShopId|SalePageId|SalePageTitle         |SaleProductDescShortContent|
+------+----------+----------------------+---------------------------+
|NULL  |7440259   |DHC維他命D_30粒_30日份|4511413615393              |
|NULL  |NULL      |NULL                  |NULL                       |
|NULL  |NULL      |NULL                  |NULL                       |
|NULL  |NULL      |NULL                  |NULL                       |
|NULL  |NULL      |NULL                  |NULL                       |
+------+----------+----------------------+---------------------------+
only showing top 5 rows


### Output everything

In [10]:
from pyspark.sql.functions import from_unixtime, col, date_format
import os

def export_top_1000_salepage_per_month(
    df_datetime,
    df_sale_page,
    outdir = "/Users/bryant_lue/Documents/Data_Tmp/BDA/top1000_salepage_per_month"
):
    # Ensure output dir exists
    if not os.path.exists(outdir):
        os.makedirs(outdir)
    
    # Loop over months
    for month in [f"2023-{m:02d}" for m in range(9, 13)]:
        print(f"Processing {month}...")
        df_month = df_datetime.filter(col("Month") == month)
        if df_month.count() == 0:
            print(f"No data for {month}. Skipping.")
            continue
        # Top 1000 by view count
        df_sorted_counts = df_month.groupBy("SalePageId") \
            .count() \
            .withColumnRenamed("count", "view_count") \
            .orderBy("view_count", ascending=False) \
            .limit(1000)
        
        df_sorted_counts.show(5, truncate=False)
        
        df_ids = df_sorted_counts.select("SalePageId")

        df_ids.show(5, truncate=False)

        top_1000_sale_page_ids = [row['SalePageId'] for row in df_ids.collect()]
        
        df_sale_page_filtered = df_sale_page.filter(
            df_sale_page['SalePageId'].isin(top_1000_sale_page_ids)
        )

        df_sale_page_filtered.show(5, truncate=False)
        
        df_sale_page_info = df_sale_page_filtered.select(
            "SalePageId", "SalePageTitle", "SaleProductDescShortContent"
        )
        
        df_joined = df_sale_page_info.join(
            df_sorted_counts, on="SalePageId", how="inner"
        )
        
        # Export CSV for this month
        output_path = os.path.join(outdir, f"top_1000_sale_page_views_{month}.csv")
        # Avoid Spark creating multiple files by using coalesce(1)
        df_joined.coalesce(1).write.csv(
            output_path, header=True, mode="overwrite"
        )
        print(f"Exported {output_path}")

In [11]:
# Export top 1000 sale pages per month
export_top_1000_salepage_per_month(df_datetime, df_sale_page)

Processing 2023-09...


                                                                                

+----------+----------+
|SalePageId|view_count|
+----------+----------+
|8943431.0 |531307    |
|7678885.0 |481582    |
|9041268.0 |270335    |
|7648595.0 |115291    |
|7904452.0 |111804    |
+----------+----------+
only showing top 5 rows


                                                                                

+----------+
|SalePageId|
+----------+
|8943431.0 |
|7678885.0 |
|9041268.0 |
|7648595.0 |
|7904452.0 |
+----------+
only showing top 5 rows


                                                                                

+------+----------+-------------+---------------------------+
|ShopId|SalePageId|SalePageTitle|SaleProductDescShortContent|
+------+----------+-------------+---------------------------+
+------+----------+-------------+---------------------------+



                                                                                

Exported /Users/bryant_lue/Documents/Data_Tmp/BDA/top1000_salepage_per_month/top_1000_sale_page_views_2023-09.csv
Processing 2023-10...


                                                                                

+----------+----------+
|SalePageId|view_count|
+----------+----------+
|7678885   |764396    |
|9041268   |258804    |
|8943431   |254061    |
|9131281   |250856    |
|7904452   |120331    |
+----------+----------+
only showing top 5 rows


                                                                                

+----------+
|SalePageId|
+----------+
|7678885   |
|9041268   |
|8943431   |
|9131281   |
|7904452   |
+----------+
only showing top 5 rows


                                                                                

+------+----------+-----------------------------------------+---------------------------+
|ShopId|SalePageId|SalePageTitle                            |SaleProductDescShortContent|
+------+----------+-----------------------------------------+---------------------------+
|NULL  |8018943   |【請洽門市藥師】合利他命愛A25_300錠      |4719889620037              |
|NULL  |8018947   |【請洽門市藥師】武田合利他命強效錠EX120錠|4714504130144              |
|NULL  |8004691   |【請洽門市藥師】曼秀雷敦AD止癢消炎乳膏90g|4719865604181              |
|NULL  |8019226   |【請洽門市藥師】樂敦舒視齡眼藥水20ml     |4987241163113              |
|NULL  |8019006   |【請洽門市藥師】大正百保能感冒顆粒26包   |4714687000906              |
+------+----------+-----------------------------------------+---------------------------+
only showing top 5 rows


                                                                                

Exported /Users/bryant_lue/Documents/Data_Tmp/BDA/top1000_salepage_per_month/top_1000_sale_page_views_2023-10.csv
Processing 2023-11...


                                                                                

+----------+----------+
|SalePageId|view_count|
+----------+----------+
|7678885   |477667    |
|9298671   |198596    |
|8943431   |190490    |
|9305980   |149129    |
|7747431   |95832     |
+----------+----------+
only showing top 5 rows


                                                                                

+----------+
|SalePageId|
+----------+
|7678885   |
|9298671   |
|8943431   |
|9305980   |
|7747431   |
+----------+
only showing top 5 rows


                                                                                

+------+----------+------------------------------------------------+---------------------------+
|ShopId|SalePageId|SalePageTitle                                   |SaleProductDescShortContent|
+------+----------+------------------------------------------------+---------------------------+
|NULL  |9316004   |船井funcare牛奶鈣魚膠原粉15入                   |4714290502118              |
|NULL  |8797171   |比得兔馬克杯落英繽紛                            |4718995013771              |
|NULL  |7523182   |P&G_Ariel微香潔淨4D洗衣膠球39P補                |4987176062369              |
|NULL  |9274382   |【11/4限定爆殺$11】ECONECO五款元普醫療用口罩10入|4719882590184              |
|NULL  |8018947   |【請洽門市藥師】武田合利他命強效錠EX120錠       |4714504130144              |
+------+----------+------------------------------------------------+---------------------------+
only showing top 5 rows


                                                                                

Exported /Users/bryant_lue/Documents/Data_Tmp/BDA/top1000_salepage_per_month/top_1000_sale_page_views_2023-11.csv
Processing 2023-12...


                                                                                

+----------+----------+
|SalePageId|view_count|
+----------+----------+
|9305980   |690840    |
|7678885   |361147    |
|9298671   |246070    |
|9131281   |120970    |
|8943431   |51496     |
+----------+----------+
only showing top 5 rows


                                                                                

+----------+
|SalePageId|
+----------+
|9305980   |
|7678885   |
|9298671   |
|9131281   |
|8943431   |
+----------+
only showing top 5 rows


                                                                                

+------+----------+-----------------------------------------+---------------------------+
|ShopId|SalePageId|SalePageTitle                            |SaleProductDescShortContent|
+------+----------+-----------------------------------------+---------------------------+
|NULL  |7523182   |P&G_Ariel微香潔淨4D洗衣膠球39P補         |4987176062369              |
|NULL  |8018947   |【請洽門市藥師】武田合利他命強效錠EX120錠|4714504130144              |
|NULL  |8004691   |【請洽門市藥師】曼秀雷敦AD止癢消炎乳膏90g|4719865604181              |
|NULL  |8019057   |【請洽門市藥師】必達定R殺菌漱口藥水125ml |4719871150047              |
|NULL  |8019006   |【請洽門市藥師】大正百保能感冒顆粒26包   |4714687000906              |
+------+----------+-----------------------------------------+---------------------------+
only showing top 5 rows


[Stage 181:>                                                        (0 + 1) / 1]

Exported /Users/bryant_lue/Documents/Data_Tmp/BDA/top1000_salepage_per_month/top_1000_sale_page_views_2023-12.csv


                                                                                

### 最後結束 spark session

In [None]:
# Stop the Spark session
# spark.stop()

25/06/01 13:49:27 WARN HeartbeatReceiver: Removing executor driver with no recent heartbeats: 146404 ms exceeds timeout 120000 ms
25/06/01 13:49:27 WARN SparkContext: Killing executors is not supported by current scheduler.
25/06/01 13:49:29 ERROR Inbox: Ignoring error
org.apache.spark.SparkException: Exception thrown in awaitResult: 
	at org.apache.spark.util.SparkThreadUtils$.awaitResult(SparkThreadUtils.scala:53)
	at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:342)
	at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcTimeout.scala:75)
	at org.apache.spark.rpc.RpcEnv.setupEndpointRefByURI(RpcEnv.scala:102)
	at org.apache.spark.rpc.RpcEnv.setupEndpointRef(RpcEnv.scala:110)
	at org.apache.spark.util.RpcUtils$.makeDriverRef(RpcUtils.scala:36)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.driverEndpoint$lzycompute(BlockManagerMasterEndpoint.scala:132)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.org$apache$spark$storage$BlockManagerMasterEndpoint$$