In [31]:
import os 
import sys 

os.environ['PYSPARK_PYTHON'] = sys.executable
os.environ['PYSPARK_DRIVER_PYTHON'] = sys.executable

os.environ["JAVA_HOME"] = "C:\\java"
os.environ["HADOOP_HOME"] = "C:\\hadoop"
os.environ["PATH"] += os.pathsep + os.path.join(os.environ["HADOOP_HOME"], "bin")

In [32]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.types import *

In [33]:
spark = (
    SparkSession.builder
    .appName("Base_Vendedores")
    .master("local[*]")
    .config("spark.sql.shuffle.partitions", "4")
    .getOrCreate()
)

In [34]:
df = spark.read.csv('Arquivos/base_vendedores.csv', header='True', inferSchema='True', sep=';')

In [35]:
df.show(10)

+----------+----------+--------------------+--------------+--------------+---+---------+----------+-----------+----------+------------+---------+-----------+----------------------+------------------+--------------------+-------------------+-------------+
|  ID_Venda|Data_Venda|            Vendedor|           CPF|        Cidade| UF|  Produto| Categoria|Canal_Venda|Qtd_Vendas|Valor_Vendas|  Salario|Meta_Mensal|Percentual_Cumprimento|           Cliente|       Email_Cliente|           Telefone|Data_Cadastro|
+----------+----------+--------------------+--------------+--------------+---+---------+----------+-----------+----------+------------+---------+-----------+----------------------+------------------+--------------------+-------------------+-------------+
|VENDA-5262|2022-11-11|           Igor Leão|684.025.197-76|     fortaleza| ce|       TV|    Roupas|   WhatsApp|      26.0|    31194.95|  4528.22|   70564.92|                   0.6|  Sra. Maya Borges|jesusluisa@exampl...|    (021) 7932-

In [36]:
df_corrigido = df.withColumn('ID_Venda', regexp_replace(col('ID_Venda'),'VENDA-','vnd'))
df_corrigido.show()

+--------+----------+--------------------+--------------+--------------+---+---------+----------+-----------+----------+------------+---------+-----------+----------------------+--------------------+--------------------+-------------------+-------------+
|ID_Venda|Data_Venda|            Vendedor|           CPF|        Cidade| UF|  Produto| Categoria|Canal_Venda|Qtd_Vendas|Valor_Vendas|  Salario|Meta_Mensal|Percentual_Cumprimento|             Cliente|       Email_Cliente|           Telefone|Data_Cadastro|
+--------+----------+--------------------+--------------+--------------+---+---------+----------+-----------+----------+------------+---------+-----------+----------------------+--------------------+--------------------+-------------------+-------------+
| vnd5262|2022-11-11|           Igor Leão|684.025.197-76|     fortaleza| ce|       TV|    Roupas|   WhatsApp|      26.0|    31194.95|  4528.22|   70564.92|                   0.6|    Sra. Maya Borges|jesusluisa@exampl...|    (021) 7932-

In [37]:
df_corrigido = df_corrigido.withColumn('ID_Venda',when(col('ID_Venda').startswith('vnd'),col('ID_Venda'))
                             .otherwise(concat(lit('vnd'),col('ID_Venda'))))
df_corrigido.show()

+--------+----------+--------------------+--------------+--------------+---+---------+----------+-----------+----------+------------+---------+-----------+----------------------+--------------------+--------------------+-------------------+-------------+
|ID_Venda|Data_Venda|            Vendedor|           CPF|        Cidade| UF|  Produto| Categoria|Canal_Venda|Qtd_Vendas|Valor_Vendas|  Salario|Meta_Mensal|Percentual_Cumprimento|             Cliente|       Email_Cliente|           Telefone|Data_Cadastro|
+--------+----------+--------------------+--------------+--------------+---+---------+----------+-----------+----------+------------+---------+-----------+----------------------+--------------------+--------------------+-------------------+-------------+
| vnd5262|2022-11-11|           Igor Leão|684.025.197-76|     fortaleza| ce|       TV|    Roupas|   WhatsApp|      26.0|    31194.95|  4528.22|   70564.92|                   0.6|    Sra. Maya Borges|jesusluisa@exampl...|    (021) 7932-

+--------+----------+--------------------+--------------+--------------+---+---------+----------+-----------+----------+------------+---------+-----------+----------------------+--------------------+--------------------+-------------------+-------------+
|ID_Venda|Data_Venda|            Vendedor|           CPF|        Cidade| UF|  Produto| Categoria|Canal_Venda|Qtd_Vendas|Valor_Vendas|  Salario|Meta_Mensal|Percentual_Cumprimento|             Cliente|       Email_Cliente|           Telefone|Data_Cadastro|
+--------+----------+--------------------+--------------+--------------+---+---------+----------+-----------+----------+------------+---------+-----------+----------------------+--------------------+--------------------+-------------------+-------------+
| vnd5262|2022-11-11|           Igor Leão|684.025.197-76|     fortaleza| ce|       TV|    Roupas|   WhatsApp|      26.0|    31194.95|  4528.22|   70564.92|                   0.6|    Sra. Maya Borges|jesusluisa@exampl...|    (021) 7932-