In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pyspark.sql.types import * 
from pyspark.sql import SparkSession, DataFrame as SparkDataFrame
import pyspark.sql.functions as F
from pyspark.sql.functions import col,isnan, when, count, coalesce
from pyspark import SparkContext
from pyspark.sql import SQLContext
from pyspark.sql.window import Window
from pyspark.sql.functions import col, lag, lead, row_number, min,max, first
import json
from functools import reduce
import sys
from cassandra.cluster import Cluster
import os
import time

# from mock.tasks import adiciona_carro}
cluster = Cluster(['cassandra'])
session = cluster.connect()

ss = SparkSession.builder.appName("test").getOrCreate()
sql = SQLContext(ss)

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

session.execute("USE simulacao")

import json
params = json.load(open('./mock/parametros.json'))



In [2]:
fps = 5
p = [[key]+list(params[key].values()) for key in params.keys()]
p = ss.createDataFrame(p, ["Rodovia"]+list(params[list(params.keys())[0]].keys()))
df_old = ss.createDataFrame([], "placa: string, posicao: int, faixa: int, rodovia: string, tempo_da_simulacao: int, velocidade: double, aceleracao: double, posicao_prevista: double, acima_vel: boolean, aplicaMulta: boolean, tempo_em_curso: int,tempo_inicio: int")
rodovias = p.select(F.collect_list('rodovia')).collect()[0][0]
vel_media = {rodovia:0 for rodovia in rodovias} #"BR-116": 0, "BR-040": 0, "BR-135": 0, "BR-393": 0}
n_vel_media = {rodovia:0 for rodovia in rodovias} # {"BR-116": 0, "BR-040": 0, "BR-135": 0, "BR-393": 0}

tempo_medio = {rodovia:0 for rodovia in rodovias}
n_tempo_medio = {rodovia:0 for rodovia in rodovias}

def atualiza_media(media_atual, tamanho_atual, media_add, tamanho_add):
    if media_add == None:
        return media_atual
    if tamanho_atual == 0:
        return media_add
    tamanho_total = tamanho_atual + tamanho_add
    return (media_atual/tamanho_total)*tamanho_atual + (media_add/tamanho_total)*tamanho_add

def processa_velocidade_media(batch):
    global vel_media, n_vel_media
    batch = batch.filter(F.col("vel_y").isNotNull())
    
    # group by "rodovia" and aggregate the mean of "velocidade"
    mean_df = batch.groupBy("rodovia").agg(F.mean(F.abs("vel_y").alias('vel_y')).alias('vel_y'))
    # collect the rows as a list
    mean_rows = mean_df.collect()
    # create a dictionary with "rodovia" as key and mean as value
    mean_dict = {row.asDict()["rodovia"]: row.asDict()["vel_y"] for row in mean_rows}
    # group by "rodovia" and aggregate the mean of "velocidade"
    length_df = batch.groupBy("rodovia").agg(F.count("vel_y"))
    length_df = length_df.withColumnRenamed("count(vel_y)", "vel_y")
    # collect the rows as a list
    length_rows = length_df.collect()
    # create a dictionary with "rodovia" as key and mean as value
    length_dict = {row.asDict()["rodovia"]: row.asDict()["vel_y"] for row in length_rows}
    for key in length_dict.keys():
        n_vel_media[key]+=length_dict[key]
        vel_media[key] = atualiza_media(vel_media[key], n_vel_media[key], mean_dict[key], length_dict[key])

def processa_tempo_cruzamento(batch):
    global n_tempo_medio, tempo_medio
    batch = batch.filter(F.col("tempo_cruzamento").isNotNull())
    # group by "rodovia" and aggregate the mean of "velocidade"
    mean_df = batch.groupBy("rodovia").agg(F.mean("tempo_cruzamento"))
    mean_df = mean_df.withColumnRenamed("avg(tempo_cruzamento)", "tempo_cruzamento")
    # collect the rows as a list
    mean_rows = mean_df.collect()
    # create a dictionary with "rodovia" as key and mean as value
    mean_dict = {row.asDict()["rodovia"]: row.asDict()["tempo_cruzamento"] for row in mean_rows}

    # group by "rodovia" and aggregate the mean of "velocidade"
    length_df = batch.groupBy("rodovia").agg(F.count("tempo_cruzamento"))
    length_df = length_df.withColumnRenamed("count(tempo_cruzamento)", "tempo_cruzamento")
    # collect the rows as a list
    length_rows = length_df.collect()
    # create a dictionary with "rodovia" as key and mean as value
    length_dict = {row.asDict()["rodovia"]: row.asDict()["tempo_cruzamento"] for row in length_rows}
    for key in length_dict.keys():
        n_tempo_medio[key] += length_dict[key]
        tempo_medio[key] = atualiza_media(tempo_medio[key], n_tempo_medio[key], mean_dict[key], length_dict[key])

In [14]:
a, b = 0, 100000
query = f"SELECT * FROM simulacao WHERE tempo_da_simulacao >= {a} AND tempo_da_simulacao <= {b} ALLOW FILTERING;"
r = list(session.execute(query))
if r != []:
    df = ss.createDataFrame(r)

In [15]:
df.count()

55647

In [16]:
df.show()

+-------+------------------+-------+-----+-------------------+
|rodovia|tempo_da_simulacao|  placa|pos_x|              pos_y|
+-------+------------------+-------+-----+-------------------+
| BR-262|              1258|ARG1K23|835.0|-15.000000000003268|
| BR-262|              1258|ARG3Z45|745.0| 1022.0999999999999|
| BR-262|              1258|CHI2S34|475.0|             -131.0|
| BR-262|              1258|CHI4Z45|745.0|-30.850000000000136|
| BR-262|              1258|COL3X45|385.0|  737.4999999999987|
| BR-262|              1258|PER3D45|835.0| 354.30999999999983|
| BR-262|              1425|ARG7W89|565.0|  171.3999999999998|
| BR-262|              1425|BRA4O56|565.0| 1152.9999999999986|
| BR-262|              1425|ECU0K01|835.0|   38.3599999999999|
| BR-262|              1425|PER3D45|835.0|  770.3099999999998|
| BR-070|               969|CHI8W89|385.0|  7.900000000000048|
| BR-070|               969|GUY1N23|475.0| 1084.0999999999979|
| BR-070|               969|PAR1T23|835.0| -60.44000000

In [17]:
from pyspark.sql import Window
from pyspark.sql.functions import lag, col

df_cruzamento = ss.createDataFrame([], "rodovia: string, placa: string, tempo_inicio: bigint, tempo_final: bigint")

Velocidades_Maximas = p.select(F.col('rodovia'), F.col("VelocidadeMaxima"))
Aceleracoes_Maximas = p.select(F.col('rodovia'), 0.8*F.col("AceleracaoMaxima"))
Aceleracoes_Maximas = Aceleracoes_Maximas.withColumnRenamed("(AceleracaoMaxima * 0.8)", "AceleracaoMaxima")

collision_tolerance = 1
collision_tolerance_quad = 0.5
def processa_carro(df, df_cruzamento):
    windowSpec = Window.partitionBy("placa").orderBy("tempo_da_simulacao")
    df = df.withColumn("prev_pos_y", lag("pos_y", 1).over(windowSpec))
    df = df.withColumn("prev_tempo_da_simulacao", lag("tempo_da_simulacao", 1).over(windowSpec))
    df = df.withColumn("vel_y", (col("pos_y") - col("prev_pos_y")) / (col("tempo_da_simulacao") - col("prev_tempo_da_simulacao")))
    df = df.withColumn("prev_vel_y", lag("vel_y", 1).over(windowSpec))
    df = df.withColumn("acel_y", (col("vel_y") - col("prev_vel_y")) / (col("tempo_da_simulacao") - col("prev_tempo_da_simulacao")))
    
    processa_velocidade_media(df)
    
    df = df.withColumn("posicao_prevista", col("pos_y") + col("vel_y") * (collision_tolerance) + col("acel_y") * collision_tolerance_quad)
    
    window_spec_rf = Window.partitionBy("rodovia", "pos_x").orderBy('pos_y')
    lag_column = col("posicao_prevista") - lag(col("posicao_prevista")).over(window_spec_rf)
    lead_column = lead(col("posicao_prevista")).over(window_spec_rf) - col("posicao_prevista")
    
    # Add the lag column to the DataFrame
    df = df.withColumn("Risco_Colisão", when(((lag_column < 0) | (lead_column < 0)), 1).otherwise(0))
    
    df = df.join(Velocidades_Maximas,on='rodovia',how='left')
    df = df.join(Aceleracoes_Maximas,on='rodovia',how='left')
    
    df = df.withColumn('acima_vel',F.abs(col('vel_y'))>F.abs(col('VelocidadeMaxima')))
    df = df.withColumn('acima_acel',F.abs(col('acel_y'))>F.abs(col('AceleracaoMaxima')))
    
    #Calcula acima_vel
    
    df = df.withColumn("troca_faixa", col("pos_x") != lag("pos_x", 1).over(windowSpec))
    
    #Contador de trocas
    
    df = df.withColumn('multado',((F.col('acima_vel') == 1) & (lag('acima_vel').over(windowSpec) == 0)))
    
    #Multa
    
    windowSpec = Window.partitionBy('rodovia',"placa").orderBy('tempo_da_simulacao')
    
    df = df.withColumn("prev_pos_y", lag("pos_y", 1).over(windowSpec))
    df = df.withColumn("on_road", (((col("pos_y") > 0) & (col('pos_y') < 800))))
    
    df = df.withColumn('tempo_inicio',when(((F.col('on_road') == True) & (lag('on_road').over(windowSpec) == False)), F.col("tempo_da_simulacao")).otherwise(None))
    df = df.withColumn('tempo_final',when(((F.col('on_road') == True) & (lead('on_road').over(windowSpec) == False)), F.col("tempo_da_simulacao")).otherwise(None))
    df_cruzamento_aux = df.select('rodovia', 'placa', 'tempo_inicio','tempo_final').filter((F.col('vel_y') != 0) & (F.col('tempo_inicio').isNotNull() | F.col('tempo_final').isNotNull()))
    
    df_cruzamento = df_cruzamento.union(df_cruzamento_aux)
    
    windowSpec2 = Window.partitionBy('placa','rodovia').orderBy('tempo_final')
    
    df_cruzamento = df_cruzamento.withColumn('tempo_cruzamento', col('tempo_final') - lag('tempo_inicio').over(windowSpec2))
    df_cruzamento.show()
    processa_tempo_cruzamento(df_cruzamento)
    df_cruzamento = df_cruzamento.filter(df_cruzamento.tempo_cruzamento.isNull())

processa_carro(df, df_cruzamento)
#pipeline(df, df_cruzamento)

+-------+-------+------------+-----------+----------------+
|rodovia|  placa|tempo_inicio|tempo_final|tempo_cruzamento|
+-------+-------+------------+-----------+----------------+
| BR-262|ARG1B12|         472|       null|            null|
| BR-262|ARG1B12|         506|       null|            null|
| BR-262|ARG1B12|         557|       null|            null|
| BR-262|ARG1B12|         588|       null|            null|
| BR-262|ARG1B12|         606|       null|            null|
| BR-262|ARG1B12|         626|       null|            null|
| BR-262|ARG1B12|         663|       null|            null|
| BR-262|ARG1B12|         678|       null|            null|
| BR-262|ARG1B12|         685|       null|            null|
| BR-262|ARG1B12|         789|       null|            null|
| BR-262|ARG1B12|         878|       null|            null|
| BR-262|ARG1B12|         970|       null|            null|
| BR-262|ARG1B12|        1064|       null|            null|
| BR-262|ARG1B12|        1147|       nul

----------------------------------------
Exception occurred during processing of request from ('127.0.0.1', 42042)
Traceback (most recent call last):
  File "/opt/conda/lib/python3.11/socketserver.py", line 317, in _handle_request_noblock
    self.process_request(request, client_address)
  File "/opt/conda/lib/python3.11/socketserver.py", line 348, in process_request
    self.finish_request(request, client_address)
  File "/opt/conda/lib/python3.11/socketserver.py", line 361, in finish_request
    self.RequestHandlerClass(request, client_address, self)
  File "/opt/conda/lib/python3.11/socketserver.py", line 755, in __init__
    self.handle()
  File "/usr/local/spark/python/pyspark/accumulators.py", line 281, in handle
    poll(accum_updates)
  File "/usr/local/spark/python/pyspark/accumulators.py", line 253, in poll
    if func():
       ^^^^^^
  File "/usr/local/spark/python/pyspark/accumulators.py", line 257, in accum_updates
    num_updates = read_int(self.rfile)
                  

In [13]:
tempo_medio

{'BR-040': -43.86985294117647,
 'BR-116': -15.68939393939394,
 'BR-135': -96.40126582278481,
 'BR-393': 0,
 'BR-101': 0,
 'BR-376': 0,
 'BR-262': -47.4220623501199,
 'BR-153': 0,
 'BR-230': 0,
 'BR-349': 0,
 'BR-060': -18.719635627530366,
 'BR-050': 0,
 'BR-070': -14.058951965065502,
 'BR-163': 0,
 'BR-277': 0}

In [7]:
df_cruzamento.show()

+-------+-----+------------+-----------+
|rodovia|placa|tempo_inicio|tempo_final|
+-------+-----+------------+-----------+
+-------+-----+------------+-----------+



In [8]:
pd.set_option('display.max_rows', 100)
ata = df.orderBy(df.tempo_da_simulacao.desc()).toPandas()
ata

Unnamed: 0,rodovia,tempo_da_simulacao,placa,pos_x,pos_y
0,BR-262,2847,BOL7L89,745.0,644.50
1,BR-262,2847,ECU6Y78,835.0,473.10
2,BR-262,2847,ECU8U90,745.0,854.66
3,BR-262,2847,PAR1A23,385.0,380.30
4,BR-262,2847,PAR1I12,475.0,67.50
...,...,...,...,...,...
53204,BR-040,33,URU6A78,655.0,918.40
53205,BR-040,31,URU6A78,655.0,928.06
53206,BR-040,26,URU6A78,655.0,934.60
53207,BR-070,26,PER9X90,565.0,38.30


In [9]:
df_cruzamento.orderBy(df_cruzamento.placa.asc()).show(1000)

+-------+-----+------------+-----------+
|rodovia|placa|tempo_inicio|tempo_final|
+-------+-----+------------+-----------+
+-------+-----+------------+-----------+



In [10]:
tempo_medio

{'BR-040': -43.86985294117647,
 'BR-116': -15.68939393939394,
 'BR-135': -96.40126582278481,
 'BR-393': 0,
 'BR-101': 0,
 'BR-376': 0,
 'BR-262': -47.4220623501199,
 'BR-153': 0,
 'BR-230': 0,
 'BR-349': 0,
 'BR-060': -18.719635627530366,
 'BR-050': 0,
 'BR-070': -14.058951965065502,
 'BR-163': 0,
 'BR-277': 0}

In [11]:
vel_media

{'BR-040': 45.07763884640503,
 'BR-116': 58.9333870684953,
 'BR-135': 38.92565946095567,
 'BR-393': 0,
 'BR-101': 0,
 'BR-376': 0,
 'BR-262': 41.23718737155681,
 'BR-153': 0,
 'BR-230': 0,
 'BR-349': 0,
 'BR-060': 65.72035707227431,
 'BR-050': 0,
 'BR-070': 51.38990145144816,
 'BR-163': 0,
 'BR-277': 0}

In [12]:
dfp = df.toPandas()
dfp[dfp['placa'] == 'PAR7P89'].sort_values(by=['tempo_da_simulacao'], ascending=True)

Unnamed: 0,rodovia,tempo_da_simulacao,placa,pos_x,pos_y
12148,BR-135,151,PAR7P89,475.0,-10.5
7120,BR-135,154,PAR7P89,475.0,-10.5
5327,BR-135,155,PAR7P89,475.0,-10.5
2601,BR-135,175,PAR7P89,385.0,-82.4
25942,BR-135,179,PAR7P89,475.0,-54.5
...,...,...,...,...,...
13476,BR-262,2816,PAR7P89,385.0,12.0
11143,BR-262,2827,PAR7P89,385.0,98.6
19669,BR-262,2838,PAR7P89,385.0,203.9
27077,BR-262,2840,PAR7P89,385.0,199.6
