In [None]:
## using Jinja2 for Text Summarization
# Jinja2 to dynamically generate the refined summary for your trajectory data

!pip install Jinja2



In [1]:
import os #Importa o módulo os, que fornece funcionalidades para manipulação de arquivos e diretórios.
import re #Importa o módulo re para manipulação e análise de expressões regulares.
from collections import defaultdict #Importa defaultdict, um dicionário especial que permite definir valores padrão para chaves inexistentes.
from jinja2 import Template # Importa Template da biblioteca Jinja2, usada para gerar textos formatados dinamicamente.
global aspects # Declara aspects como uma variável global para armazenar os aspectos (atributos) analisados posteriormente no código



#Define o caminho do arquivo que contém os dados representativos.
#Defines the path of the file containing the representative data.

#representative_data_path = '/content/Running_Example_v5_rt 0 0 .csv'
representative_data_path = '/content/Running_Example_v5_rt 15 10.csv'



#Verifica se o arquivo existe. Se não, lança um erro FileNotFoundError.
#Checks if the file exists. If not, throws a FileNotFoundError.

if not os.path.exists(representative_data_path):
    raise FileNotFoundError(f"File not found: {representative_data_path}")



#Abre o arquivo em modo leitura ('r') com codificação UTF-8.
#Opens the file in read mode ('r') with UTF-8 encoding.

#Lê todas as linhas do arquivo e armazena na variável representative_data_raw.
#Reads all lines from the file and stores them in the representative_data_raw variable.

#Se houver erro ao ler o arquivo, lança um erro RuntimeError.
#If there is an error reading the file, throw a RuntimeError.

try:
    with open(representative_data_path, 'r', encoding='utf-8') as file:
          representative_data_raw = file.readlines()
except Exception as e:
    raise RuntimeError(f"Error reading file: {e}")



#Define a função pre_processing, responsável por processar os dados do arquivo.
#Defines the pre_processing function, responsible for processing the file data.

def pre_processing():

  # Parse the representative file to extract structured information
  #Analisa o arquivo representativo para extrair informações estruturadas
  def parse_representative_data(raw_lines):
    global aspects


    #Cria um dicionário para armazenar três seções: metadata, settings e trajectory_description.
    #Creates a dictionary to store three sections: metadata, settings and trajectory_description.
    parsed_data = {
        "metadata": {},
        "settings": {},
        "trajectory_description": []
    }


    #Remove espaços extras no início e fim de cada linha.
    #Remove extra spaces at the beginning and end of each line.
    current_section = None
    for line in raw_lines:
        line = line.strip()


        #Identifica qual seção do arquivo está sendo processada e define current_section de acordo.
        #Identifies which section of the file is being processed and sets current_section accordingly.
        if "Info input dataset:" in line:
            current_section = "metadata"
        elif "RT setting infos:" in line:
            current_section = "settings"
        elif "RT description:" in line:
            current_section = "trajectory_description"
        elif line.startswith("##"):
            current_section = None  # End of a section | Fim de uma seção


        #Se estiver na seção metadata, armazena os pares chave-valor.
        #If in the metadata section, stores key-value pairs.
        elif current_section == "metadata":
            #Analisar pares de chave-valor de metadados
            # Parse metadata key-value pairs
            if re.match(r'\|.*\|', line):  # Header line for metadata | Linha de cabeçalho para metadados
                headers = [h.strip() for h in line.split(",")]
            else:
                values = [v.strip() for v in line.split(",")]
                for h, v in zip(headers, values):
                    parsed_data["metadata"][h] = v


        #Se estiver na seção settings, armazena as configurações do arquivo.
        #If it is in the settings section, it stores the file settings.
        elif current_section == "settings":
            #Detalhes das configurações de análise
            # Parse settings details
            if re.match(r'thresholdCellSize', line):  # Header line for settings | Linha de cabeçalho para configurações
                settings_headers = [h.strip() for h in line.split(",")]
            else:
                settings_values = [v.strip() for v in line.split(",")]
                for h, v in zip(settings_headers, settings_values):
                    parsed_data["settings"][h] = v


        #Se estiver na seção trajectory_description, armazena os dados da trajetória do objeto.
        #If in the trajectory description section, stores the object's trajectory data.
        elif current_section == "trajectory_description":
            #Analisar linhas de descrição de trajetória
            # Parse trajectory description lines
            if re.match(r'lat_lon', line):  # Header line for trajectory description | Linha de cabeçalho para descrição da trajetória
                trajectory_headers = [h.strip() for h in line.split(",")]
            else:
                trajectory_values = [v.strip() for v in line.split(",")]
                parsed_data["trajectory_description"].append(
                    dict(zip(trajectory_headers, trajectory_values))
                )


            #Define aspects globalmente para armazenar os aspectos analisados.
            #Define aspects globally to store the analyzed aspects.
            aspects = trajectory_headers
    return parsed_data



  #Processa os dados do arquivo e imprime a estrutura gerada.
  #Process the file data and print the generated structure.
  structured_data = parse_representative_data(representative_data_raw)
  print(structured_data)


  #Define os aspectos a serem analisados: Ponto de Interesse (POI) e Clima (WEATHER).
  #Define the aspects to be analyzed: Point of Interest (POI) and Weather.
  aspects_to_analyze = ['POI', 'WEATHER']



  #Analisa padrões comuns nos aspectos especificados.
  #Analyzes common patterns in the specified aspects.
  def analyze_aspect_patterns(data, aspects):
      """Analyzes trajectory data for common patterns in specified aspects."""
      trajectory = data["trajectory_description"]
      aspects_to_analyze = aspects



      #Cria um dicionário para armazenar contagens de valores observados nos aspectos.
      #Creates a dictionary to store counts of observed values ​​in aspects.
      patterns = defaultdict(lambda: {"values": defaultdict(int), "count": 0})



      # Itera sobre a descrição da trajetória.
      #Iterate over the trajectory description.
      for aspect in trajectory:
        print(f"Aspect: {aspect}")



        # Conta quantas vezes cada valor aparece para cada aspecto.
        #Counts how many times each value appears for each aspect.
        for aspect_name in aspects_to_analyze:
          if aspect_name in aspect:
              patterns[aspect_name]["count"] += 1
              patterns[aspect_name]["values"][aspect[aspect_name]] += 1

      summary_parts = []

      for aspect_name, pattern_data in patterns.items():
          if pattern_data["count"] > 0:
              values_dict = pattern_data["values"]
              if len(values_dict) == 1:
                  value = next(iter(values_dict))
                  summary_parts.append(f"Usually, the object's {aspect_name.lower()} is {value}")
              elif aspect_name == 'PRICE':
                  prices = []
                  for key, value in values_dict.items():
                    match = re.findall(r'\*(\d+):\s*([\d\.]+)', key)
                    if match:
                        for m in match:
                          prices.append(float(m[0]))

                  if prices:
                    min_price = min(prices)
                    max_price = max(prices)
                    summary_parts.append(f"The evaluation of price in referred place is defined between {min_price} and {max_price}")

              else:
                  most_frequent_value = max(pattern_data["values"], key=pattern_data["values"].get)
                  if pattern_data["values"][most_frequent_value] == pattern_data["count"]:
                    summary_parts.append(f"Usually, the object's {aspect_name.lower()} is {most_frequent_value}")

      return " ".join(summary_parts)



  # Gera um resumo dos dados processados.
  #Generates a summary of the processed data.
  def generate_summary(data):



      # Resume a quantidade de trajetórias e pontos.
      #Summarizes the number of trajectories and points.
      metadata_summary = (
          f"The representative dataset contains {data['metadata'].get('|input.T|', 'N/A')} trajectories "
          f"with a total of {data['metadata'].get('|input.T.points|', 'N/A')} points.\n"
      )



      # Resume as configurações utilizadas na análise.
      #Summarizes the settings used in the analysis.
      settings = data["settings"]
      settings_summary = (
          "The analysis used the following parameters:\n"
          f"- Threshold Cell Size: {settings.get('thresholdCellSize', 'N/A')}\n"
          f"- Cell Size: {settings.get('CellSize', 'N/A')}\n"
          f"- Relevant Cell Threshold: {settings.get('tauRelevantCell', 'N/A')}\n"
          f"- Representativeness Value Threshold: {settings.get('tauRepresentativenessValue', 'N/A')}\n"
          f"- Runtime: {settings.get('runtime start', 'N/A')} to {settings.get('runtime end', 'N/A')}.\n"
      )

      #Resumo da descrição da trajetória
      # Trajectory description summary
      trajectory = data["trajectory_description"]



      # Lista eventos da trajetória do objeto.
      #List events from the object's trajectory.
      trajectory_summary = "Key trajectory events:\n"
      for aspect in trajectory:
          trajectory_summary += (
              f"  - At {aspect.get('time', 'N/A')}, the object was at location {aspect.get('lat_lon', 'N/A')} "
              f"({aspect.get('POI', 'N/A')}) under {aspect.get('WEATHER', 'N/A')} weather conditions "
              f"with precipitation of {aspect.get('PRECIP', 'N/A')} mm.\n"
          )



      # Chama a função de análise de padrões.
      #Call the pattern analysis function.
      trajectory_patterns_summary = analyze_aspect_patterns(data, aspects=aspects_to_analyze)



      # Retorna um resumo completo.
      #Returns a complete summary.
      summary = metadata_summary + settings_summary + trajectory_summary + trajectory_patterns_summary
      return summary


  #Gera um resumo para os dados representativos
  # Generate a summary for the representative data
  summary = generate_summary(structured_data)



  # Classifica um horário em manhã, tarde, noite ou madrugada.
  #Classifies a time as morning, afternoon, evening or night.
  def time_of_day(hour):
      """Categorize an hour into morning, afternoon, evening, or night."""
      if 5 <= hour < 12:
          return "morning"
      elif 12 <= hour < 17:
          return "afternoon"
      elif 17 <= hour < 21:
          return "evening"
      else:
          return "night"



  # Formata descrições de POI.
  #Formats POI descriptions
  def format_poi(poi):
      """Format POI descriptions to be more human-readable."""
      if "1.0" in poi:  # If the frequency is 100%, use a simpler phrase | Se a frequência for 100%, use uma frase mais simples
          poi_name = poi.split(":")[0].strip("{}")  # Extract the POI name | Extraia o nome do POI
          return f"at {poi_name.lower()}"

      return poi.replace("at ", "").strip()  # Remove extra "at" from transitions | Remover "at" extra das transições



  # Formata informações meteorológicas.
  #Formats weather information.
  def format_weather(weather):
      """Format weather conditions to include frequency density."""
      matches = re.findall(r'{(.*?):\s*([\d\.]+)}', weather)
      if matches:
          conditions = []
          for condition, freq in matches:
              freq = float(freq)
              if freq == 1.0:
                  conditions.append(f"{condition.lower()} (100% frequency)")
              else:
                  percentage = int(freq * 100)
                  conditions.append(f"{condition.lower()} ({percentage}%)")
          return " and ".join(conditions)


      return weather

  return structured_data, time_of_day, format_weather, format_poi



# Chama a função pre_processing, que retorna os dados processados e funções auxiliares.
#Call the pre_processing function, which returns the processed data, and auxiliary functions.
structured_data, time_of_day, format_weather, format_poi = pre_processing()









{'metadata': {'|input.T|': '3', '|input.T.points|': '17'}, 'settings': {'thresholdCellSize': '9', 'CellSize': '4.602687082099914', 'tauRelevantCell': '0.15', 'tauRepresentativenessValue': '0.1', '|cell|': '5', 'minPointRC': '2.5500002', '|rt|': '14', '|coverPoints|': '16', 'runtime start': '23-07-07 14:41:12.958', 'runtime end': '23-07-07 14:41:13.275'}, 'trajectory_description': [{'lat_lon': '0.0 6.2', 'time': '05:45', 'PRICE': '{*-1: 1.0}', 'POI': '{HOME: 1.0}', 'WEATHER': '{CLEAR: 1.0}', 'PRECIP': '10.0', 'mapping': '{1: 1}'}, {'lat_lon': '0.4 6.7', 'time': '06:15', 'PRICE': '{*-1: 1.0}', 'POI': '{HOME: 1.0}', 'WEATHER': '{CLEAR: 1.0}', 'PRECIP': '15.0', 'mapping': '{2: 7}'}, {'lat_lon': '1.0 6.8', 'time': '06:50', 'PRICE': '{*-1: 1.0}', 'POI': '{HOME: 1.0}', 'WEATHER': '{CLEAR: 1.0}', 'PRECIP': '10.0', 'mapping': '{3: 13}'}, {'lat_lon': '2.5 10.5', 'time': '10:10', 'PRICE': '{*2: 1.0}', 'POI': '{LIBRARY: 1.0}', 'WEATHER': '{CLOUDS: 1.0}', 'PRECIP': '15.0', 'mapping': '{2: 8}'}, {'l

In [2]:
#from pickle import EMPTY_DICT

import re
from datetime import datetime # Importa a classe datetime, utilizada para manipular e formatar datas e horas.
from jinja2 import Template # Importa Template da biblioteca jinja2, que permite criar templates para gerar texto estruturado.



# Define a função textual_descriptor, que gera uma descrição textual baseada nos dados estruturados de trajetórias (structured_data).
#Defines the textual_descriptor function, which generates a textual description based on structured trajectory data (structured_data).

# Também recebe as funções auxiliares time_of_day, format_weather e format_poi, usadas para classificar horários, formatar condições climáticas e locais visitados.
#It also receives the auxiliary functions time_of_day, format_weather and format_poi, used to classify times, format weather conditions and places visited.
def textual_descriptor(structured_data, time_of_day, format_weather, format_poi):

  '''
  função já faz parte do 'Textual Descriptor'.

  '''



  # Define uma função interna para gerar um resumo narrativo dos eventos de trajetória.
  #Defines an internal function to generate a narrative summary of trajectory events.
  def generate_narrative_summary(data):
      """Generate a narrative summary of the trajectory data."""
      trajectory = data["trajectory_description"]

      #Eventos de grupo por hora do dia
      # Group events by time of day
      time_groups = defaultdict(list)
      for event in trajectory:
          time_str = event.get("time", "").split("-")[0].strip()  # Use start time and strip trailing space | Use a hora de início e retire o espaço final
          hour = int(time_str.split(":")[0])  # Extract the hour | Extrai a hora
          period = time_of_day(hour)
          time_groups[period].append(event)

      #Gerar narrativa para cada grupo de tempo
      # Generate narrative for each time group
      summary_parts = []
      for period, events in time_groups.items():
          poi_counts = defaultdict(int)
          weather_counts = defaultdict(int)
          transitions = []

          for i, event in enumerate(events):
              poi = event.get("POI", "N/A")
              weather = event.get("WEATHER", "N/A")
              poi_counts[poi] += 1
              weather_counts[weather] += 1

              #Acompanha transições entre locais
              # Track transitions between locations
              if i > 0:
                  prev_poi = events[i - 1].get("POI", "N/A")
                  if poi != prev_poi:
                      transitions.append(
                          f"At {event['time']}, the object moved from {format_poi(prev_poi)} to at {format_poi(poi)}."
                      )

          #Resumi os POI e o clima mais frequentes
          # Summarize the most frequent POI and weather
          most_common_poi = max(poi_counts, key=poi_counts.get)
          most_common_weather = max(weather_counts, key=weather_counts.get)
          formatted_weather = format_weather(most_common_weather)
          summary_parts.append(
              f"In the {period}, the object is usually {format_poi(most_common_poi)} "
              f"with weather conditions {formatted_weather}."
          )

          #Adiciona transições se existirem
          # Add transitions if they exist
          if transitions:
              summary_parts.extend(transitions)

      #Combine todos os resumos em uma narrativa coesa
      # Combine all summaries into a cohesive narrative
      return " ".join(summary_parts)

  #Exemplo de uso:
  # Example usage:
  narrative_summary = generate_narrative_summary(structured_data)
  #print(narrative_summary)

  def refine_summary_text(summary_text):
      """Refines the summary text to improve readability and remove redundant words."""
      summary_text = summary_text.replace("frequently in ", "at ")
      summary_text = summary_text.replace("with weather conditions", "with")

      # Improve readability by ensuring correct sentence structure
      #summary_text = re.sub(r'\s+', ' ', summary_text)  # Remove extra spaces
      #summary_text = summary_text.replace(" - ", "–")  # Replace hyphen with en dash for time ranges

      return summary_text

  #Normalize os espaços e substitua os traços uma vez, em vez de em vários lugares
  # Normalize spaces & replace dashes once, instead of in multiple places
  def preprocess_text(input_text):
      """Cleans and normalizes input text for parsing."""
      input_text = re.sub(r"\s+", " ", input_text.strip())  # Normalize spaces | Normalizar espaços
      input_text = input_text.replace(" - ", "–")  # Replace dashes | Substituir travessões
      input_text = input_text.replace("–", "-")
      return input_text

  refined_summary = refine_summary_text(narrative_summary)
  print(refined_summary)

  usuario = input("Digite o nome do usuário (ou pressione Enter para usar 'The object'): ").strip()
  if not usuario:
      usuario = "object"  # Default value | Valor padrão


  # Template de saída
  '''

  '''
  template_text = """
  {% for period in data %}
  In the {{ period.time_period }}, the """ + usuario + """ is usually at {{ period.poi }}{% if period.weather and "100% frequency" not in period.weather %} with {{ period.weather }}{% endif %}.
  {% for transition in period.transitions %}
  At {{ transition.time }}, the """ + usuario + """ moves from {{ transition.from_poi }} to {{ transition.to_poi.split(',')[0] }}.{% if not loop.last %} Then, {% endif %}
  {% endfor %}
  {% endfor %}
  """


  input_text = preprocess_text(refined_summary)

  def parse_input_text(input_text):
      """Analisa o texto e estrutura os dados para o template."""
      time_period_pattern = r"In the (\w+), the object is usually at ([\w\s]+)(?: with [\w\s]+ \(\d+% frequency\))?\."
      transition_pattern = r"At\s+([\d:–]+)(?:-\d{2}:\d{2})?, the object moved from(?: at)? ([\w\s]+) to(?: at)? (?:\{([^}]+)\}|([\w\s]+))\."

      # Extrair períodos do dia
      # extract the periods of the day
      time_periods = re.findall(time_period_pattern, input_text)

      # Extrair transições
      #Extract transitions
      transitions = re.findall(transition_pattern, input_text)
      #print(transitions)

      def extract_best_destination(to_pois):
          """Extracts the best destination based on frequency values."""
          if not to_pois:
              return "unknown location"

          destinations = re.findall(r"([A-Z]+): ([\d.]+)", to_pois)
          if destinations:
              best_destination = sorted(destinations, key=lambda x: (-float(x[1]), x[0]))[0][0]
              return best_destination
          return to_pois.strip()


      def get_period_for_time(time_str):
          """Retorna o período do dia baseado na hora fornecida."""
          period_boundaries = {
              "morning": (6, 11),
              "afternoon": (12, 17),
              "evening": (18, 21),
              "night": (22, 5),  # Night começa às 22:00 e vai até 05:59 | Night start at 22:00 e lasts until 05:59
          }

          time_obj = datetime.strptime(time_str, "%H:%M")
          hour = time_obj.hour

          for period, (start_hour, end_hour) in period_boundaries.items():
              if start_hour <= end_hour:  # Intervalos normais (morning, afternoon, evening) | Normal intervals (morning, afternoon, evening)
                  if start_hour <= hour <= end_hour:
                      return period
              else:  # Caso especial para "night" que cruza a meia-noite (22h - 05h) | Especial case for night que cross midnight(22 - 05)
                  if hour >= start_hour or hour <= end_hour:
                      return period
          return "night"

      # Estruturar os dados com as transições divididas por período
      #Structure the data with the transitions divided by period
      structured_data = []
      for time_period, poi in time_periods:
          transitions_in_period = []

          # Filtrar transições para o período atual
          #Filter transitions for the current period

          for time, from_poi, to_pois_1, to_pois_2 in transitions:
                period = get_period_for_time(time)
                to_pois = to_pois_1 if to_pois_1 else to_pois_2  # Ensure correct destination format | Garantir o formato correto do destino

                if period == time_period.lower():
                    best_destination = extract_best_destination(to_pois)
                    transitions_in_period.append({
                        "time": time.strip(),
                        "from_poi": from_poi.strip(),
                        "to_poi": best_destination.strip(),
                    })


          structured_data.append({
              "time_period": time_period.lower(),
              "poi": poi.strip(),
              "transitions": transitions_in_period
          })

      return structured_data

  def generate_summary(data):
      """Gera um resumo"""
      summary = []
      for period in data:
          transitions = period["transitions"]
          summary.append(
              f"In the {period['time_period']}, the {usuario} is usually at {period['poi']}."
          )
          if transitions:
              for transition in transitions:
                  summary.append(
                      f"Then the {usuario} goes to {transition['to_poi']}."
                  )
      return "\n".join(summary)

  # Parse do texto de entrada
  #Parse the input text
  structured_data = parse_input_text(input_text)

  # Verificar se a transição foi reconhecida
  #Verify if the transition was recognized
  '''
  for period in structured_data_new:
      print(f"Período: {period['time_period']}, Transições: {period['transitions']}")
  '''

  # Renderizar o template
  #Render the template
  template = Template(template_text)
  output_jinja = template.render(data=structured_data)

  summary = generate_summary(structured_data)

  # Imprimir os resultados
  #Print the result
  print("=== Texto Gerado ===")
  print(output_jinja)
  print("\n=== Resumo ===")
  print(summary)

textual_descriptor(structured_data, time_of_day, format_weather, format_poi)


In the morning, the object is usually at home with clear (100% frequency). At 10:10, the object moved from at home to at at library. At 10:35, the object moved from at library to at at shopping. At 11:57, the object moved from at shopping to at at library. In the afternoon, the object is usually at restaurant with clouds (100% frequency). At 14:15, the object moved from at restaurant to at at university. In the evening, the object is usually at shopping with clear (100% frequency). At 18:00, the object moved from at shopping to at at restaurant. At 19:39, the object moved from at restaurant to at at university. In the night, the object is usually at restaurant with clear (100% frequency). At 23:20 - 23:30, the object moved from at restaurant to at at home.
Digite o nome do usuário (ou pressione Enter para usar 'The object'): test
=== Texto Gerado ===

  
  In the morning, the test is usually at home.
  
  At 10:10, the test moves from home to at library. Then, 
  
  At 10:35, the test 