In [1]:
from pyspark import SparkContext, SparkConf
from pyspark.sql import SparkSession

class SettingsSpark:
    
    def __init__(self, instance=None, cores=None, driver_memory=None, executor_memory=None):
        self.instance = instance or "2"
        self.cores = cores or "1"
        self.driver_memory = driver_memory or "512m"
        self.executor_memory = executor_memory or "758m"
        self.__init_conf__()
        self.sc = self.builder_conf().sparkContext
        self.log_level("OFF")
        
    
    def __init_conf__(self):
        conf = ( 
            SparkConf()
            .setMaster("k8s://https://kubernetes.default.svc.cluster.local:443")  
            .setAppName("spark") 
            .set("spark.kubernetes.container.image", "magaiwer/pyspark-notebook:3.2.1") 
            .set("spark.kubernetes.namespace", "spark") 
            .set("spark.kubernetes.authenticate.caCertFile", "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt") 
            .set("spark.kubernetes.authenticate.oauthTokenFile", "/var/run/secrets/kubernetes.io/serviceaccount/token") 
            .set("spark.executor.instances", self.instance) 
            .set("spark.executor.cores", self.cores) 
            .set("spark.driver.memory", self.driver_memory) 
            .set("spark.executor.memory", self.executor_memory) 
            .set("spark.kubernetes.authenticate.driver.serviceAccountName", "spark-sa") 
            .set("spark.kubernetes.authenticate.serviceAccountName", "spark-sa") 
            .set("spark.driver.port", "29413") 
            .set("spark.driver.host", "jupyter-labs.spark.svc.cluster.local") 
            .set("spark.driver.bindAddress", "0.0.0.0") 
            .set("spark.hadoop.com.amazonaws.services.s3.enableV4", "true")
        )
        return conf
    
    def builder_conf(self):
        return SparkSession.builder.config(conf=self.__init_conf__()).getOrCreate()
    
    def init_minio(self, acess_key=None, secret_key=None, endpoint=None):
        self.acess_key = acess_key or "admin"
        self.secret_key = secret_key or "e7bc4dc8-3abf-4187-bcc8-5a4bc8dc32e1"
        self.endpoint = endpoint or "https://api.minio.magaiver.dev"
    
        self.sc._jsc.hadoopConfiguration().set("fs.s3a.access.key", self.acess_key)
        self.sc._jsc.hadoopConfiguration().set("fs.s3a.secret.key", self.secret_key)
        self.sc._jsc.hadoopConfiguration().set("fs.s3a.endpoint", self.endpoint)
        self.sc._jsc.hadoopConfiguration().set("fs.s3a.path.style.access", "true")
        self.sc._jsc.hadoopConfiguration().set("fs.s3a.fast.upload", "true")
        self.sc._jsc.hadoopConfiguration().set("fs.s3a.connection.establish.timeout", "2000")
        self.sc._jsc.hadoopConfiguration().set("fs.s3a.connection.timeout", "2000")
        self.sc._jsc.hadoopConfiguration().set("fs.s3a.attempts.maximum", "2")
        self.sc._jsc.hadoopConfiguration().set("fs.s3a.connection.ssl.enabled","false")
    
    def stop(self):
        self.sc.stop()
    
    def log_level(self, level=None):
        lev = level or "OFF"
        self.sc.setLogLevel(lev)

ModuleNotFoundError: No module named 'pyspark'