In [1]:
import os
spark_home = os.path.abspath(os.getcwd() + "/../spark-3.5.5-bin-hadoop3")
hadoop_home = os.path.abspath(os.getcwd() + "/../winutils")
print(f"I am using the following SPARK_HOME: {spark_home}")
if os.name == 'nt':
    os.environ["HADOOP_HOME"] = f"{hadoop_home}"
    print(f"Windows detected: set HADOOP_HOME to: {os.environ['HADOOP_HOME']}")
    hadoop_bin = os.path.join(hadoop_home, "bin")
    os.environ["PATH"] = f"{hadoop_bin};{os.environ['PATH']}"
    print(f"  Also added Hadoop bin directory to PATH: {hadoop_bin}")

import findspark
import pyspark
from pyspark.streaming import StreamingContext

findspark.init(spark_home)
sc = pyspark.SparkContext()
spark = pyspark.sql.SparkSession.builder.getOrCreate()

I am using the following SPARK_HOME: /Users/chenpinyu/Desktop/spark/spark-3.5.5-bin-hadoop3


25/05/04 15:30:40 WARN Utils: Your hostname, Pins-MacBook-Pro.local resolves to a loopback address: 127.0.0.1; using 10.46.219.58 instead (on interface en0)
25/05/04 15:30:40 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
25/05/04 15:30:40 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


In [2]:
import threading

# Helper thread to avoid the Spark StreamingContext from blocking Jupyter
class StreamingThread(threading.Thread):
    def __init__(self, ssc):
        super().__init__()
        self.ssc = ssc
    def run(self):
        self.ssc.start()
        self.ssc.awaitTermination()
    def stop(self):
        print('----- Stopping... this may take a few seconds -----')
        self.ssc.stop(stopSparkContext=False, stopGraceFully=True)

In [3]:
import random
import torch
import joblib
from pyspark.sql.functions import udf, struct, array, col, lit
from pyspark.sql.types import StringType
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from pyspark.sql import SparkSession, Row

import json

In [4]:
model_path = "./saved_model"
tokenizer_path = "./saved_tokenizer"
label_encoder_path = "./label_encoder.joblib"
device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")
model = AutoModelForSequenceClassification.from_pretrained(model_path).to(device)
tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
label_encoder = joblib.load(label_encoder_path)

You passed along `num_labels=3` with an incompatible id to label map: {'0': 'LABEL_0', '1': 'LABEL_1', '2': 'LABEL_2', '3': 'LABEL_3', '4': 'LABEL_4', '5': 'LABEL_5', '6': 'LABEL_6', '7': 'LABEL_7', '8': 'LABEL_8', '9': 'LABEL_9', '10': 'LABEL_10', '11': 'LABEL_11', '12': 'LABEL_12', '13': 'LABEL_13', '14': 'LABEL_14', '15': 'LABEL_15', '16': 'LABEL_16', '17': 'LABEL_17', '18': 'LABEL_18', '19': 'LABEL_19'}. The number of labels will be overwritten to 20.


In [5]:


models_loaded = False
my_model = None
my_tokenizer = None
label_encoder = None
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Define device


# Initialize StreamingContext
ssc = StreamingContext(sc, 10)


def load_models():
#    print("Loading models...")
    model_path = "./saved_model"
    tokenizer_path = "./saved_tokenizer"
    label_encoder_path = "./label_encoder.joblib"

    loaded_tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
    loaded_model = AutoModelForSequenceClassification.from_pretrained(model_path).to("mps") # mps

    try:
        loaded_label_encoder = joblib.load(label_encoder_path)
        label_names = loaded_label_encoder.classes_
        print("Label encoder loaded successfully.") # success message
    except Exception as e:
        print(f"Failed to load label encoder: {e}")
        loaded_label_encoder = None
        label_names = None # set to none

    print("Models loaded successfully.")
#    print(f"Loaded model: {loaded_model}")
#    print(f"Label encoder: {loaded_label_encoder}")
    return loaded_model, loaded_tokenizer, loaded_label_encoder

def predict(df):
    global my_model, my_tokenizer, label_encoder
    print("predicting")
    if my_model is None or my_tokenizer is None:
        print("Model and Tokenizer not loaded")
        return None
    
    text_list = [row.text for row in df.collect()]
    inputs = my_tokenizer(text_list, padding=True, truncation=True, return_tensors="pt").to("mps") # mps
    my_model.eval()
    with torch.no_grad():
        outputs = my_model(**inputs)
    predictions = outputs.logits.argmax(dim=-1).cpu().numpy() # argmax
    
    if label_encoder is not None:
        predicted_labels = [label_encoder.classes_[p] for p in predictions]
        return predicted_labels
    else:
        return predictions

def process(time, rdd):
    global models_loaded, my_model, my_tokenizer, label_encoder
    if rdd.isEmpty():
        return

    print("========= %s =========" % str(time))

    # Convert to data frame
    df = spark.read.json(rdd)
    df.show()

    # Load in the model if not yet loaded:
    if not models_loaded:
        # load in your models here
        model, tokenizer, encoder = load_models()
        my_model = model
        my_tokenizer = tokenizer
        label_encoder = encoder
        models_loaded = True

    # Convert RDD of JSON strings to a list of dicts
    records = rdd.map(lambda x: json.loads(x)).collect()

    if not records:
        return
    for r in records:
        if "main_category" in r and r["main_category"]:
            r["main_category"] = r["main_category"].split('.')[0]
        title = r.get("title", "")
        summary = r.get("summary", "")
        r["text"] = title + " " + summary

    # Create a Spark DataFrame for prediction
    prediction_df = spark.createDataFrame([Row(text=rec["text"]) for rec in records if "text" in rec])

    # Make predictions using the predict function
    predicted_categories = predict(prediction_df)

    # Add predictions back to records and print
    if predicted_categories:
        for record, pred_label in zip(records, predicted_categories):
            record["predicted_category"] = pred_label
            #print(record)

        result_rows = [Row(**record) for record in records]
        result_df = spark.createDataFrame(result_rows)
        result_df.select("title", "main_category", "predicted_category").show(truncate=False)
    else:
        print("No predictions made for this batch.")

lines = ssc.socketTextStream("seppe.net", 7778)
lines.foreachRDD(process)

# Start the streaming context (you can stop later using ssc.stop())
ssc.start()
#ssc.awaitTermination()

25/05/04 15:31:14 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:14 WARN BlockManager: Block input-0-1746365473800 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:31:14 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:14 WARN BlockManager: Block input-0-1746365474000 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:31:14 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:14 WARN BlockManager: Block input-0-1746365474200 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:31:14 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:14 WARN BlockManager: Block input-0-1746365474400 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:31:14 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:14 WARN BlockManager: Block input-0-1746365474600 replicated to



25/05/04 15:31:20 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:20 WARN BlockManager: Block input-0-1746365480600 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:31:22 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:22 WARN BlockManager: Block input-0-1746365482000 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:31:22 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:22 WARN BlockManager: Block input-0-1746365482200 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:31:22 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:22 WARN BlockManager: Block input-0-1746365482400 replicated to only 0 peer(s) instead of 1 peers
You passed along `num_labels=3` with an incompatible id to label map: {'0': 'LABEL_0', '1': 'LABEL_1', '2': 'LABEL_2', '3': 'LABEL_3', '4': 'LABEL_4', '5': 'LABEL_5', '6': 

+--------------------+--------------------+------------------+--------------------+--------------------+--------------------+
|                 aid|          categories|     main_category|           published|             summary|               title|
+--------------------+--------------------+------------------+--------------------+--------------------+--------------------+
|http://arxiv.org/...|             math.FA|           math.FA|2025-04-30T13:32:00Z|While the classic...|The Quantitative ...|
|http://arxiv.org/...|cond-mat.stat-mec...|cond-mat.stat-mech|2025-04-30T13:33:06Z|We investigate th...|Superconductivity...|
|http://arxiv.org/...|quant-ph,cond-mat...|          quant-ph|2025-04-30T13:33:10Z|Non-equilibrium d...|Skin Effect Induc...|
|http://arxiv.org/...|cs.IT,eess.IV,mat...|             cs.IT|2025-04-30T13:34:06Z|To efficiently co...|Fast Sign Retriev...|
|http://arxiv.org/...|     math.ST,stat.TH|           math.ST|2025-04-30T13:34:14Z|Estimating some m...|Convergence ra

25/05/04 15:31:22 WARN BlockManager: Block input-0-1746365482600 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:31:23 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:23 WARN BlockManager: Block input-0-1746365482800 replicated to only 0 peer(s) instead of 1 peers
[Stage 0:>                                                          (0 + 1) / 1]

Label encoder loaded successfully.
Models loaded successfully.


                                                                                

predicting


25/05/04 15:31:24 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:24 WARN BlockManager: Block input-0-1746365484000 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:31:24 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:24 WARN BlockManager: Block input-0-1746365484200 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:31:24 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:24 WARN BlockManager: Block input-0-1746365484400 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:31:24 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:24 WARN BlockManager: Block input-0-1746365484600 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:31:25 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:25 WARN BlockManager: Block input-0-1746365484800 replicated to

+-----------------------------------------------------------------------------------------------------------------------------+-------------+------------------+
|title                                                                                                                        |main_category|predicted_category|
+-----------------------------------------------------------------------------------------------------------------------------+-------------+------------------+
|The Quantitative Faber-Krahn Inequality for the Combinatorial Laplacian\n  in $\mathbb{Z}^{d}$                               |math         |math              |
|Superconductivity and trimers on attractive-$U$ Hubbard ladders                                                              |cond-mat     |cond-mat          |
|Skin Effect Induced Anomalous Dynamics from Charge-Fluctuating Initial\n  States                                             |quant-ph     |quant-ph          |
|Fast Sign Retrieval via Sub-band 

25/05/04 15:31:26 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:26 WARN BlockManager: Block input-0-1746365486400 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:31:26 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:26 WARN BlockManager: Block input-0-1746365486600 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:31:27 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:27 WARN BlockManager: Block input-0-1746365486800 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:31:28 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:28 WARN BlockManager: Block input-0-1746365488000 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:31:28 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:28 WARN BlockManager: Block input-0-1746365488200 replicated to



25/05/04 15:31:30 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:30 WARN BlockManager: Block input-0-1746365490200 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:31:30 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:30 WARN BlockManager: Block input-0-1746365490400 replicated to only 0 peer(s) instead of 1 peers


+--------------------+--------------------+-----------------+--------------------+--------------------+--------------------+
|                 aid|          categories|    main_category|           published|             summary|               title|
+--------------------+--------------------+-----------------+--------------------+--------------------+--------------------+
|http://arxiv.org/...|             math.DG|          math.DG|2025-04-30T13:47:27Z|We construct a 2-...|Computer-assisted...|
|http://arxiv.org/...|    physics.plasm-ph| physics.plasm-ph|2025-04-30T13:48:17Z|The effect of neo...|The effect of neo...|
|http://arxiv.org/...|               cs.CV|            cs.CV|2025-04-30T13:49:59Z|The success of fa...|Diffusion-based A...|
|http://arxiv.org/...|stat.ME,math.ST,s...|          stat.ME|2025-04-30T13:51:38Z|Identifying relat...|Conditional indep...|
|http://arxiv.org/...|             math.PR|          math.PR|2025-04-30T13:52:09Z|In this article, ...|Moment estimates ...|


25/05/04 15:31:30 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:30 WARN BlockManager: Block input-0-1746365490600 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:31:31 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:31 WARN BlockManager: Block input-0-1746365490800 replicated to only 0 peer(s) instead of 1 peers


predicting


25/05/04 15:31:32 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:32 WARN BlockManager: Block input-0-1746365492200 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:31:32 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:32 WARN BlockManager: Block input-0-1746365492400 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:31:32 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:32 WARN BlockManager: Block input-0-1746365492600 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:31:33 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:33 WARN BlockManager: Block input-0-1746365492800 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:31:34 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:34 WARN BlockManager: Block input-0-1746365494200 replicated to

+----------------------------------------------------------------------------------------------------------------------+-------------+------------------+
|title                                                                                                                 |main_category|predicted_category|
+----------------------------------------------------------------------------------------------------------------------+-------------+------------------+
|Computer-assisted construction of $SU(2)$-invariant negative Einstein\n  metrics                                      |math         |math              |
|The effect of neon seeding on plasma edge transport in EAST                                                           |physics      |physics           |
|Diffusion-based Adversarial Identity Manipulation for Facial Privacy\n  Protection                                    |cs           |cs                |
|Conditional independence testing with a single realization of a\n  multivar

25/05/04 15:31:40 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:40 WARN BlockManager: Block input-0-1746365500200 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:31:40 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:40 WARN BlockManager: Block input-0-1746365500400 replicated to only 0 peer(s) instead of 1 peers


+--------------------+--------------------+-----------------+--------------------+--------------------+--------------------+
|                 aid|          categories|    main_category|           published|             summary|               title|
+--------------------+--------------------+-----------------+--------------------+--------------------+--------------------+
|http://arxiv.org/...|econ.EM,math.ST,s...|          econ.EM|2025-04-30T14:10:30Z|This paper studie...|On the Robustness...|
|http://arxiv.org/...|             eess.IV|          eess.IV|2025-04-30T14:10:40Z|Floods are one of...|Assimilation of S...|
|http://arxiv.org/...|         astro-ph.HE|      astro-ph.HE|2025-04-30T14:12:36Z|We present the re...|Broadband study o...|
|http://arxiv.org/...|             math.DG|          math.DG|2025-04-30T14:13:05Z|The Futaki invari...|Futaki invariant ...|
|http://arxiv.org/...|cond-mat.mtrl-sci...|cond-mat.mtrl-sci|2025-04-30T14:13:08Z|Topological spin ...|Effect of Magneti...|


25/05/04 15:31:40 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:40 WARN BlockManager: Block input-0-1746365500600 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:31:41 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:41 WARN BlockManager: Block input-0-1746365500800 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:31:41 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:41 WARN BlockManager: Block input-0-1746365501000 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:31:42 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:42 WARN BlockManager: Block input-0-1746365502200 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:31:42 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:42 WARN BlockManager: Block input-0-1746365502400 replicated to

+-----------------------------------------------------------------------------------------------------------------------------------+-------------+------------------+
|title                                                                                                                              |main_category|predicted_category|
+-----------------------------------------------------------------------------------------------------------------------------------+-------------+------------------+
|On the Robustness of Mixture Models in the Presence of Hidden Markov\n  Regimes with Covariate-Dependent Transition Probabilities  |econ         |hep-ph            |
|Assimilation of SWOT Altimetry Data for Riverine Flood Reanalysis: From\n  Synthetic to Real Data                                  |eess         |physics           |
|Broadband study of the SMC pulsar RX J0032.9-7348 during its X-ray\n  brightening in 2024                                          |astro-ph     |astro-ph          

                                                                                



25/05/04 15:31:50 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:50 WARN BlockManager: Block input-0-1746365510400 replicated to only 0 peer(s) instead of 1 peers


+--------------------+--------------------+--------------+--------------------+--------------------+--------------------+
|                 aid|          categories| main_category|           published|             summary|               title|
+--------------------+--------------------+--------------+--------------------+--------------------+--------------------+
|http://arxiv.org/...|               cs.AI|         cs.AI|2025-04-30T14:34:56Z|AutomationML has ...|Automatic Mapping...|
|http://arxiv.org/...|         cs.RO,cs.AI|         cs.RO|2025-04-30T14:38:01Z|Ego-motion estima...|Self-Supervised M...|
|http://arxiv.org/...|            q-bio.QM|      q-bio.QM|2025-04-30T14:39:01Z|Virtual reality (...|MovementVR: An op...|
|http://arxiv.org/...|  physics.app-ph,J.2|physics.app-ph|2025-04-30T14:42:04Z|We use the reduce...|Effective interfa...|
|http://arxiv.org/...|      nucl-th,hep-ph|       nucl-th|2025-04-30T14:43:35Z|We study thermal ...|Dilepton emission...|
|http://arxiv.org/...|  

25/05/04 15:31:50 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:50 WARN BlockManager: Block input-0-1746365510600 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:31:51 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:51 WARN BlockManager: Block input-0-1746365510800 replicated to only 0 peer(s) instead of 1 peers


predicting


25/05/04 15:31:51 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:51 WARN BlockManager: Block input-0-1746365511000 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:31:51 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:51 WARN BlockManager: Block input-0-1746365511200 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:31:52 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:52 WARN BlockManager: Block input-0-1746365512400 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:31:52 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:52 WARN BlockManager: Block input-0-1746365512600 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:31:53 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:53 WARN BlockManager: Block input-0-1746365512800 replicated to

+----------------------------------------------------------------------------------------------------------------------------------------------------+-------------+------------------+
|title                                                                                                                                               |main_category|predicted_category|
+----------------------------------------------------------------------------------------------------------------------------------------------------+-------------+------------------+
|Automatic Mapping of AutomationML Files to Ontologies for Graph Queries\n  and Validation                                                           |cs           |cs                |
|Self-Supervised Monocular Visual Drone Model Identification through\n  Improved Occlusion Handling                                                  |cs           |cs                |
|MovementVR: An open-source tool for the study of motor control and\n  learning 

25/05/04 15:31:58 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:58 WARN BlockManager: Block input-0-1746365518600 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:31:59 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:59 WARN BlockManager: Block input-0-1746365518800 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:31:59 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:59 WARN BlockManager: Block input-0-1746365519000 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:31:59 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:59 WARN BlockManager: Block input-0-1746365519200 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:31:59 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:31:59 WARN BlockManager: Block input-0-1746365519400 replicated to

+--------------------+--------------------+-----------------+--------------------+--------------------+--------------------+
|                 aid|          categories|    main_category|           published|             summary|               title|
+--------------------+--------------------+-----------------+--------------------+--------------------+--------------------+
|http://arxiv.org/...|cs.IT,cs.AI,eess....|            cs.IT|2025-04-30T15:05:20Z|Sionna is an open...|Sionna RT: Techni...|
|http://arxiv.org/...|         astro-ph.GA|      astro-ph.GA|2025-04-30T15:06:07Z|In Paper I (Rowan...|Modelling JWST mi...|
|http://arxiv.org/...|cs.NI,cs.SY,eess....|            cs.NI|2025-04-30T15:06:44Z|Backpressure (BP)...|Generalizing Bias...|
|http://arxiv.org/...|     math.CA,math.DS|          math.CA|2025-04-30T15:08:16Z|We define discret...|Discrete Generati...|
|http://arxiv.org/...|             eess.SP|          eess.SP|2025-04-30T15:09:07Z|As a paradigm shi...|Task-Agnostic Sem...|


25/05/04 15:32:00 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:32:00 WARN BlockManager: Block input-0-1746365520600 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:32:01 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:32:01 WARN BlockManager: Block input-0-1746365520800 replicated to only 0 peer(s) instead of 1 peers


predicting


25/05/04 15:32:01 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:32:01 WARN BlockManager: Block input-0-1746365521000 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:32:01 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:32:01 WARN BlockManager: Block input-0-1746365521200 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:32:01 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:32:01 WARN BlockManager: Block input-0-1746365521400 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:32:02 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:32:02 WARN BlockManager: Block input-0-1746365522600 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:32:03 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:32:03 WARN BlockManager: Block input-0-1746365522800 replicated to

+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------+------------------+
|title                                                                                                                                                                    |main_category|predicted_category|
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------+------------------+
|Sionna RT: Technical Report                                                                                                                                              |cs           |eess              |
|Modelling JWST mid-infrared counts II: Extension to 5.6 μm, optical,\n  radio and X-rays                                                                                 |astro-ph 

25/05/04 15:32:09 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:32:09 WARN BlockManager: Block input-0-1746365528800 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:32:09 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:32:09 WARN BlockManager: Block input-0-1746365529000 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:32:09 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:32:09 WARN BlockManager: Block input-0-1746365529200 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:32:09 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:32:09 WARN BlockManager: Block input-0-1746365529400 replicated to only 0 peer(s) instead of 1 peers
                                                                                

+--------------------+--------------------+--------------+--------------------+--------------------+--------------------+
|                 aid|          categories| main_category|           published|             summary|               title|
+--------------------+--------------------+--------------+--------------------+--------------------+--------------------+
|http://arxiv.org/...|              hep-ph|        hep-ph|2025-04-30T15:39:15Z|This text contain...|Jet Modification ...|
|http://arxiv.org/...|            quant-ph|      quant-ph|2025-04-30T15:39:30Z|Conventionally in...|Exponential advan...|
|http://arxiv.org/...|      physics.optics|physics.optics|2025-04-30T15:40:49Z|High-resolution e...|Laser injection l...|
|http://arxiv.org/...|         cs.CL,I.2.7|         cs.CL|2025-04-30T15:41:03Z|Conventional retr...|Improving Retriev...|
|http://arxiv.org/...|            quant-ph|      quant-ph|2025-04-30T15:41:38Z|Current advanceme...|Classical capacit...|
|http://arxiv.org/...|  

25/05/04 15:32:10 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:32:10 WARN BlockManager: Block input-0-1746365530600 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:32:11 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:32:11 WARN BlockManager: Block input-0-1746365530800 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:32:11 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:32:11 WARN BlockManager: Block input-0-1746365531000 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:32:11 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:32:11 WARN BlockManager: Block input-0-1746365531200 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:32:11 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:32:11 WARN BlockManager: Block input-0-1746365531400 replicated to

+--------------------------------------------------------------------------------------------------------+-------------+------------------+
|title                                                                                                   |main_category|predicted_category|
+--------------------------------------------------------------------------------------------------------+-------------+------------------+
|Jet Modification and Medium Response -- Theory Overview                                                 |hep-ph       |physics           |
|Exponential advantage in quantum sensing of correlated parameters                                       |quant-ph     |quant-ph          |
|Laser injection locking and nanophotonic spectral translation of\n  electro-optic frequency combs       |physics      |physics           |
|Improving Retrieval-Augmented Neural Machine Translation with\n  Monolingual Data                       |cs           |cs                |
|Classical capacitie

25/05/04 15:32:15 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:32:15 WARN BlockManager: Block input-0-1746365535600 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:32:17 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:32:17 WARN BlockManager: Block input-0-1746365536800 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:32:17 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:32:17 WARN BlockManager: Block input-0-1746365537000 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:32:17 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:32:17 WARN BlockManager: Block input-0-1746365537200 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:32:17 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:32:17 WARN BlockManager: Block input-0-1746365537400 replicated to

+--------------------+--------------------+---------------+--------------------+--------------------+--------------------+
|                 aid|          categories|  main_category|           published|             summary|               title|
+--------------------+--------------------+---------------+--------------------+--------------------+--------------------+
|http://arxiv.org/...|               cs.RO|          cs.RO|2025-04-30T16:14:25Z|Recent advancemen...|LLM-based Interac...|
|http://arxiv.org/...|               cs.CR|          cs.CR|2025-04-30T16:15:53Z|While static anal...|LASHED: LLMs And ...|
|http://arxiv.org/...|               cs.CV|          cs.CV|2025-04-30T16:16:14Z|Generative models...|Anatomical Simila...|
|http://arxiv.org/...|         cs.MM,cs.AI|          cs.MM|2025-04-30T16:17:05Z|Short video platf...|Solving Copyright...|
|http://arxiv.org/...|         cs.CL,cs.AI|          cs.CL|2025-04-30T16:17:53Z|With the widespre...|MAC-Tuning: LLM M...|
|http://arxiv.or

25/05/04 15:32:21 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:32:21 WARN BlockManager: Block input-0-1746365541000 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:32:21 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:32:21 WARN BlockManager: Block input-0-1746365541200 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:32:21 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:32:21 WARN BlockManager: Block input-0-1746365541400 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:32:21 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:32:21 WARN BlockManager: Block input-0-1746365541600 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:32:23 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:32:23 WARN BlockManager: Block input-0-1746365543000 replicated to

+--------------------------------------------------------------------------------------------------------------------------+-------------+------------------+
|title                                                                                                                     |main_category|predicted_category|
+--------------------------------------------------------------------------------------------------------------------------+-------------+------------------+
|LLM-based Interactive Imitation Learning for Robotic Manipulation                                                         |cs           |cs                |
|LASHED: LLMs And Static Hardware Analysis for Early Detection of RTL\n  Bugs                                              |cs           |cs                |
|Anatomical Similarity as a New Metric to Evaluate Brain Generative\n  Models                                              |cs           |eess              |
|Solving Copyright Infringement on Short Video Platf

25/05/04 15:32:27 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:32:27 WARN BlockManager: Block input-0-1746365547000 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:32:27 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:32:27 WARN BlockManager: Block input-0-1746365547200 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:32:27 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:32:27 WARN BlockManager: Block input-0-1746365547400 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:32:27 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:32:27 WARN BlockManager: Block input-0-1746365547600 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:32:28 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:32:28 WARN BlockManager: Block input-0-1746365547800 replicated to

+--------------------+--------------------+-----------------+--------------------+--------------------+--------------------+
|                 aid|          categories|    main_category|           published|             summary|               title|
+--------------------+--------------------+-----------------+--------------------+--------------------+--------------------+
|http://arxiv.org/...|   cond-mat.supr-con|cond-mat.supr-con|2025-04-30T16:51:09Z|We investigate th...|Vortex flow aniso...|
|http://arxiv.org/...|stat.ML,cs.LG,sta...|          stat.ML|2025-04-30T16:52:43Z|The Hawkes proces...|Balancing Interpr...|
|http://arxiv.org/...|             math.PR|          math.PR|2025-04-30T16:52:58Z|By taking the vie...|On point interact...|
|http://arxiv.org/...|             math.CO|          math.CO|2025-04-30T16:56:04Z|We prove a conjec...|Girth in $GF(q)$-...|
|http://arxiv.org/...|   cs.SE,cs.AI,cs.CL|            cs.SE|2025-04-30T16:56:06Z|Despite recent pr...|SWE-smith: Scalin...|


25/05/04 15:32:31 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:32:31 WARN BlockManager: Block input-0-1746365551200 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:32:31 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:32:31 WARN BlockManager: Block input-0-1746365551400 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:32:31 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:32:31 WARN BlockManager: Block input-0-1746365551600 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:32:32 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:32:32 WARN BlockManager: Block input-0-1746365551800 replicated to only 0 peer(s) instead of 1 peers
25/05/04 15:32:33 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
25/05/04 15:32:33 WARN BlockManager: Block input-0-1746365553000 replicated to

In [6]:
ssc.stop()

25/05/04 11:47:04 ERROR ReceiverTracker: Deregistered receiver for stream 0: Stopped by driver
25/05/04 11:47:04 WARN SocketReceiver: Error receiving data
java.net.SocketException: Socket closed
	at java.base/java.net.SocketInputStream.socketRead0(Native Method)
	at java.base/java.net.SocketInputStream.socketRead(SocketInputStream.java:115)
	at java.base/java.net.SocketInputStream.read(SocketInputStream.java:168)
	at java.base/java.net.SocketInputStream.read(SocketInputStream.java:140)
	at java.base/sun.nio.cs.StreamDecoder.readBytes(StreamDecoder.java:295)
	at java.base/sun.nio.cs.StreamDecoder.implRead(StreamDecoder.java:337)
	at java.base/sun.nio.cs.StreamDecoder.read(StreamDecoder.java:179)
	at java.base/java.io.InputStreamReader.read(InputStreamReader.java:181)
	at java.base/java.io.BufferedReader.fill(BufferedReader.java:161)
	at java.base/java.io.BufferedReader.readLine(BufferedReader.java:326)
	at java.base/java.io.BufferedReader.readLine(BufferedReader.java:392)
	at org.apache

In [6]:
ssc = StreamingContext(sc, 10)

[Stage 0:>                                                          (0 + 1) / 1]

In [7]:
lines = ssc.socketTextStream("seppe.net", 7778)
lines.foreachRDD(process)

In [8]:
ssc_t = StreamingThread(ssc)
ssc_t.start()

Exception in thread Thread-10:
Traceback (most recent call last):
  File "/Users/chenpinyu/Desktop/spark/.pixi/envs/default/lib/python3.11/threading.py", line 1045, in _bootstrap_inner
    self.run()
  File "/var/folders/x2/rg0ldb_55rbddbswfqnp3dcw0000gn/T/ipykernel_28353/142111391.py", line 9, in run
  File "/Users/chenpinyu/Desktop/spark/.pixi/envs/default/lib/python3.11/site-packages/pyspark/streaming/context.py", line 226, in start
    self._jssc.start()
  File "/Users/chenpinyu/Desktop/spark/.pixi/envs/default/lib/python3.11/site-packages/py4j/java_gateway.py", line 1322, in __call__
    return_value = get_return_value(
                   ^^^^^^^^^^^^^^^^^
  File "/Users/chenpinyu/Desktop/spark/.pixi/envs/default/lib/python3.11/site-packages/pyspark/errors/exceptions/captured.py", line 179, in deco
    return f(*a, **kw)
           ^^^^^^^^^^^
  File "/Users/chenpinyu/Desktop/spark/.pixi/envs/default/lib/python3.11/site-packages/py4j/protocol.py", line 326, in get_return_value
   

In [9]:
ssc_t.stop()

----- Stopping... this may take a few seconds -----


25/05/04 12:29:20 WARN StreamingContext: StreamingContext has not been started yet
