In [4]:
import httpx

def login_and_save_coordinates(email: str, password: str, coordinates: list, date: int):
    login_url = 'https://backend.tccurbstads.com/login'
    save_coordinates_url = 'https://backend.tccurbstads.com/save_coordinates'

    # Login
    login_response = httpx.post(login_url, params={'email': email, 'password': password})

    # Check if login was successful
    if login_response.status_code == 200:
        token = login_response.json()['access_token']

        # Save coordinates
        headers = {
            'accept': 'application/json',
            'Content-Type': 'application/json',
            'Authorization': f'Bearer {token}'
        }
        data = {
            'coordinates': coordinates,
            'date': date
        }
        save_response = httpx.post(save_coordinates_url, headers=headers, json=data)

        # Check if save was successful
        if save_response.status_code == 200:
            print('Coordinates saved successfully.')
        else:
            print('Failed to save coordinates.')
    else:
        print('Login failed.')

In [5]:
from utilities.download import download_and_read_json_xz

url = "http://dadosabertos.c3sl.ufpr.br/curitibaurbs/2024_06_22_veiculos.json.xz"
df = download_and_read_json_xz(url, lines=True)
from utilities.coordinates import format_coord

df['coords'] = df.apply(lambda row: format_coord(row['LAT'], row['LON']), axis=1)
df.drop(['LAT', 'LON'], axis=1, inplace=True)
df

Unnamed: 0,VEIC,DTHR,COD_LINHA,DATE,coords
0,BA001,21/06/2024 20:24:59,924,2024-06-22,"(-49.223736, -25.376975)"
1,BA001,21/06/2024 20:24:54,924,2024-06-22,"(-49.223973, -25.376975)"
2,BA001,21/06/2024 20:24:44,924,2024-06-22,"(-49.224391, -25.377001)"
3,BA001,21/06/2024 20:24:34,924,2024-06-22,"(-49.224726, -25.376971)"
4,BA001,21/06/2024 20:24:24,924,2024-06-22,"(-49.225103, -25.376986)"
...,...,...,...,...,...
3191083,MT013,21/06/2024 09:33:51,979,2024-06-22,"(-49.293656, -25.42015)"
3191084,MT013,21/06/2024 09:33:46,979,2024-06-22,"(-49.293823, -25.420126)"
3191085,MT013,21/06/2024 09:33:31,979,2024-06-22,"(-49.294253, -25.420095)"
3191086,MT013,21/06/2024 09:32:51,979,2024-06-22,"(-49.294258, -25.419818)"


In [6]:
import pandas as pd
import pytz

# Convert 'DTHR' to datetime
df['DTHR'] = pd.to_datetime(df['DTHR'], format='%d/%m/%Y %H:%M:%S')

# Set the timezone to Brazilian time
df['DTHR'] = df['DTHR'].dt.tz_localize('America/Sao_Paulo')

# Convert to POSIX timestamp
df['DTHR'] = df['DTHR'].apply(lambda x: int(x.timestamp()))


In [7]:
import random
import numpy as np

df_inter = df[df["COD_LINHA"] == "022"]
df_inder_len = len(df_inter)

num_sequences = 5
sequence_length = 5

start_indexes = np.random.choice(
    range(df_inder_len - sequence_length + 1), num_sequences, replace=False
)

for start_index in start_indexes:
    indexes = list(range(start_index, start_index + sequence_length))
    print(indexes)

    coords = df_inter.iloc[indexes][["coords", "DTHR"]].to_dict('records')

    for coordinates in coords:
        print(coordinates)
        login_and_save_coordinates('teste@urbs.com', 'abc123', coordinates['coords'], coordinates['DTHR'])
    print("\n")


[3200, 3201, 3202, 3203, 3204]
{'coords': (-49.289208, -25.462126), 'DTHR': 1718974786}
Coordinates saved successfully.
{'coords': (-49.288911, -25.462378), 'DTHR': 1718974776}
Coordinates saved successfully.
{'coords': (-49.28909, -25.462805), 'DTHR': 1718974766}
Coordinates saved successfully.
{'coords': (-49.290038, -25.464738), 'DTHR': 1718974731}
Coordinates saved successfully.
{'coords': (-49.290146, -25.464975), 'DTHR': 1718974696}
Coordinates saved successfully.


[21889, 21890, 21891, 21892, 21893]
{'coords': (-49.212458, -25.43407), 'DTHR': 1718961522}
Coordinates saved successfully.
{'coords': (-49.212006, -25.433253), 'DTHR': 1718961512}
Coordinates saved successfully.
{'coords': (-49.210978, -25.431401), 'DTHR': 1718961492}
Coordinates saved successfully.
{'coords': (-49.210735, -25.430986), 'DTHR': 1718961487}
Coordinates saved successfully.
{'coords': (-49.210641, -25.43063), 'DTHR': 1718961482}
Coordinates saved successfully.


[38620, 38621, 38622, 38623, 38624]
{'coor

In [33]:
from typing import List
from utilities.elastic import get_elastic_client

def search_points(points: List[List[float]], distance: str) -> dict:
    # Create a list of filter conditions for each point
    filters = [
        {
            "bool": {
                "must": [
                    {
                        "exists": {
                            "field": "coordinates"
                        }
                    },
                    {
                        "geo_distance": {
                            "coordinates": point,
                            "distance": distance
                        }
                    }
                ]
            }
        }
        for point in points
    ]

    # Include the filters in your query
    query = {
        "bool": {
            "filter": filters,
            "must": [],
            "must_not": [],
            "should": []
        }
    }

    # Include the query in your request body
    body = {
        "query": query,
        "collapse": {
            "field": "CODIGOLINHA"
        },
        "size": 1000
    }

    # Get Elasticsearch client
    es = get_elastic_client()

    # Send the request
    response = es.search(index="shape_linha_without_date", body=body)

    return response

points = [[-49.26334, -25.48741], [-49.26207, -25.48693]]
distance = "0.05km"
response = search_points(points, distance)

for hit in response['hits']['hits']:
    print(hit['_source']['CODIGOLINHA'])


-49.26334
513


In [64]:
def search_closest_bus(codigolinha, coordinates, timestamp):
    es = get_elastic_client()

    start_time = timestamp - timedelta(minutes=10)
    end_time = timestamp + timedelta(minutes=10)

    query = {
        "bool": {
            "must": {
                "match": {
                    "CODIGOLINHA": codigolinha
                }
            },
            "filter": [
                {
                    "geo_distance": {
                        "distance": "1km",
                        "coords": coordinates
                    }
                },
                {
                    "range": {
                        "date": {
                            "gte": start_time.isoformat(),
                            "lte": end_time.isoformat()
                        }
                    }
                }
            ]
        }
    }

    body = {
        "query": query, 
        "size": 1,
        "sort": [
            {
                "_geo_distance": {
                    "coords": coordinates,
                    "order": "asc",
                    "unit": "km",
                    "distance_type": "plane"
                }
            }
        ]
    }

    # Send the request
    response = es.search(index="veiculos", body=body)

    return response

In [76]:
import pandas as pd
from datetime import datetime, timedelta


def search_date_range(start_date: str) -> dict:
    # Convert the start date to a datetime object
    start_datetime = datetime.strptime(start_date, "%Y-%m-%dT%H:%M:%S.%fZ")

    # Calculate the end date by adding 24 hours to the start date
    end_datetime = start_datetime + timedelta(hours=24)

    # Convert the end date to a string
    end_date = end_datetime.strftime("%Y-%m-%dT%H:%M:%S.%fZ")
    print(end_date)

    # Define the query
    query = {
        "bool": {
            "must": [],
            "filter": [
                {
                    "range": {
                        "timestamp": {
                            "format": "strict_date_optional_time",
                            "gte": start_date,
                            "lte": end_date,
                        }
                    }
                }
            ],
            "should": [],
            "must_not": [],
        }
    }

    body = {"query": query, "size": 10000}

    # Get Elasticsearch client
    es = get_elastic_client()

    # Send the request
    response = es.search(index="coordinates", body=body)

    return response


start_date = "2024-06-21T03:00:00.000Z"
response = search_date_range(start_date)

df = pd.DataFrame([hit["_source"] for hit in response["hits"]["hits"]])

from sklearn.cluster import DBSCAN
import numpy as np

R = 6371

def group_coordinates(df):
    df["latitude"] = np.radians(df["coordinates"].apply(lambda x: x[1]))
    df["longitude"] = np.radians(df["coordinates"].apply(lambda x: x[0]))

    db = DBSCAN(eps=1 / R, min_samples=2, algorithm="ball_tree", metric="haversine")

    df["cluster"] = db.fit_predict(df[["latitude", "longitude"]])

    return df


df = group_coordinates(df)

groups = df.groupby("cluster")

# get an array of coordinates for each cluster
# coordinates = [group["coordinates"].tolist() for name, group in groups]

# distance = "0.05km"
# for group in coordinates:
#     response = search_points(group, distance)
#     for hit in response["hits"]["hits"]:
#         print(hit["_source"]["CODIGOLINHA"])

es = get_elastic_client()

hits_per_cluster = []
for name, group in groups:
    points = group['coordinates'].tolist()

    response = search_points(points, distance)

    group['timestamp'] = pd.to_datetime(group['timestamp'])
    mean_timestamp = group['timestamp'].astype(np.int64).mean()
    mean_timestamp = pd.to_datetime(mean_timestamp)

    mean_timestamp_utc = mean_timestamp.tz_localize('UTC')

    hits = response['hits']['hits']

    hits_per_cluster.append([hit['_source']['CODIGOLINHA'] for hit in hits])

    for hit in hits:
        codigolinha = hit['_source']['CODIGOLINHA']

        doc = {
            'CODIGOLINHA': codigolinha,
            'amount': 1/len(hits),
            'timestamp': mean_timestamp,
        }

        # es.index(index='lotation', body=doc)

2024-06-22T03:00:00.000000Z
-49.289208
-49.212458
-49.306878
-49.240371


In [98]:
from datetime import datetime 
# last line of each group
print(hits_per_cluster)
last_lines = df.groupby('cluster').last()

for index, arr in enumerate(hits_per_cluster):
    last_line = last_lines.iloc[index]
    for codigolinha in arr:
        # transform timestamp to datetime its from str like this 2024-06-21T12:58:16+00:00   
        time = datetime.strptime(str(last_line['timestamp']), "%Y-%m-%dT%H:%M:%S%z")
        # add 2 days to the timestamp
        time = time + timedelta(days=3)

        bus = search_closest_bus(codigolinha, last_line['coordinates'], time)



        if bus['hits']['hits']:
            COD = bus['hits']['hits'][0]['_source'].get('COD', 'Default Value')

            # Create the document
            doc = {
                'COD': COD,
                'amount': 1/len(bus['hits']['hits']),
                'timestamp': time.isoformat()
            }

            es.index(index='lotation_bus', body=doc)

        else:
            print('No hits found.')




[['670', '671', '673', '674', '507', '022', 'X35', '700', 'X08'], ['023', '021', '380', '022', 'X44'], [], ['023', '021', '020', '022']]


ZeroDivisionError: division by zero