In [1]:
import sys
from importlib import reload  

from elements import Stop, Variant, Path
from queries import StopQuery, VariantQuery, PathQuery
from network.bus import BusNetwork
from chatter import Chatter

stops = StopQuery.from_ndjson()
variants = VariantQuery.from_ndjson()
paths = PathQuery.from_ndjson()
net = BusNetwork.from_json()

unavailable until proj_trans is called


In [2]:
good_chatter = Chatter(stops=stops, variants=variants, paths=paths, net=net, is_smart=True)

100%|██████████| 4397/4397 [00:00<00:00, 7581.03it/s]
100%|██████████| 4397/4397 [00:02<00:00, 1738.60it/s]

{'prefix': "\n[Notes for searching Stops]:\nTerminologies:\n* Phường/Xã = Ward\n* Quận/Huyện = Zone\n\nSearching Process:\n* In most cases, it is best to find the appropriate ID of a Stop before performing any further searching.\n* A user may mistype a word, especially names. If there is any typo, find the most reasonable alternatives before applying the search.\n* Sometimes the user may type a Vietnamese name without tones. If so, you may want to ignore the tones while searching.\n* A query may result in an error due to a lack of options, in which case, output all the possible values found so far.\n* If only one value is needed, select one randomly.\n\nLocations and Distances:\n* To calculate the distance between two Stops:\n    1. Find the IDs of the source (src) node and the destination (dest) node.\n    2. Use the given distance function with the node IDs.\n\n* To calculate the distance from one Stop (source) to all other Stops, use the following expression:\n    df['distance'] = d




In [3]:
bad_chatter = Chatter(stops=stops, variants=variants, paths=paths, net=net, is_smart=False)

100%|██████████| 4397/4397 [00:00<00:00, 8379.53it/s]
100%|██████████| 4397/4397 [00:02<00:00, 1908.24it/s]

{}





## Simple Task: Asking a property of a Stop

### Querying ID

In [5]:
query = 'What is the ID of the Stop named "Cầu vượt An Sương"'

In [6]:
print(good_chatter.ask(query)['output'])



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to filter the dataframe to find the row where the Name column is "Cầu vượt An Sương" and then retrieve the StopId.
Action: python_repl_ast
Action Input: df[df['Name'] == 'Cầu vượt An Sương']['StopId'].values[0][0m[36;1m[1;3m2252[0m[32;1m[1;3mI now know the final answer
Final Answer: The ID of the Stop named "Cầu vượt An Sương" is 2252[0m

[1m> Finished chain.[0m
The ID of the Stop named "Cầu vượt An Sương" is 2252


In [7]:
print(bad_chatter.ask(query)['output'])



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to filter the dataframe to find the row where the Name column is "Cầu vượt An Sương" and then retrieve the StopId.
Action: python_repl_ast
Action Input: df[df['Name'] == 'Cầu vượt An Sương']['StopId'].values[0][0m[36;1m[1;3m2252[0m[32;1m[1;3mI now know the final answer
Final Answer: The ID of the Stop named "Cầu vượt An Sương" is 2252.[0m

[1m> Finished chain.[0m
The ID of the Stop named "Cầu vượt An Sương" is 2252.


In [10]:
stops[2252]

| StopID:              2252                      |
| Code:                Q12 088                   |
| Name:                Cầu vượt An Sương         |
| Type:                Nhà chờ                   |
| Zone:                Quận 12                   |
| Ward:                                          |
| Address No.:         Xưởng Z735               |
| Street:              Quốc lộ 1A                |
| Support Disability?: False                     |
| Status:              Đang khai thác            |
| Lng:                 106.616081                |
| Lat:                 10.843623                 |
| Search tokens:       CvAS, XZ, Ql1A            |
| Routes:              24 -> 33 -> 60-1 -> 61-3  |

### Querying Routes

In [6]:
query = 'How many routes pass the Stop KTX Tran Hung Dao?'
stops.search('name', "KTX Trần Hưng Đạo").values[0].routes

['01', '139', '152', '45', '56']

In [3]:
print(good_chatter.ask(query)['output'])



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to find the row corresponding to the Stop KTX Tran Hung Dao and then count the number of routes passing through it.
Action: Use pandas to filter the dataframe for the row corresponding to the Stop KTX Tran Hung Dao.
Action Input: df[df['Name'] == 'KTX Trần Hưng Đạo'][0mUse pandas to filter the dataframe for the row corresponding to the Stop KTX Tran Hung Dao. is not a valid tool, try one of [python_repl_ast].[32;1m[1;3mI should use the python_repl_ast tool to filter the dataframe for the row corresponding to the Stop KTX Tran Hung Dao.
Action: python_repl_ast
Action Input: df[df['Name'] == 'KTX Trần Hưng Đạo'][0m[36;1m[1;3m     StopId    Code               Name StopType    Zone  Ward     AddressNo  \
110     121  Q1 125  KTX Trần Hưng Đạo  Nhà chờ  Quận 1  None            10   
261      26  Q1 137  KTX Trần Hưng Đạo  Nhà chờ  Quận 1  None  135/40 (135)   

            Street SupportDisability          St

In [4]:
print(bad_chatter.ask(query)['output'])



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to find the row corresponding to the Stop KTX Tran Hung Dao and then count the number of routes passing through it.
Action: Use pandas to filter the dataframe for the row corresponding to the Stop KTX Tran Hung Dao.
Action Input: df[df['Name'] == 'KTX Trần Hưng Đạo'][0mUse pandas to filter the dataframe for the row corresponding to the Stop KTX Tran Hung Dao. is not a valid tool, try one of [python_repl_ast].[32;1m[1;3mI should use the python_repl_ast tool to filter the dataframe for the row corresponding to the Stop KTX Tran Hung Dao.
Action: python_repl_ast
Action Input: df[df['Name'] == 'KTX Trần Hưng Đạo'][0m[36;1m[1;3m     StopId    Code               Name StopType    Zone  Ward     AddressNo  \
110     121  Q1 125  KTX Trần Hưng Đạo  Nhà chờ  Quận 1  None            10   
261      26  Q1 137  KTX Trần Hưng Đạo  Nhà chờ  Quận 1  None  135/40 (135)   

            Street SupportDisability          St

## Advanced Task: Finding the distance between two Stops

### Shortest distance on the bus network (using the `distance_to_one` tool)

In [17]:
query = 'Distance between "Bến xe Miền Tây" and "Bến xe Miền Đông"'

In [19]:
print(good_chatter.ask(query)['output'])



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to find the IDs of "Bến xe Miền Tây" and "Bến xe Miền Đông" before I can calculate the Euclidean distance between them. Since the names are not directly visible in the provided data snippet, I will search for these names ignoring tones as suggested in the notes for searching Stops.
Action: python_repl_ast
Action Input: print(df[df['Name'].str.contains('Miền Tây', regex=False, na=False)])[0m[36;1m[1;3m     StopId     Code             Name  StopType           Zone           Ward  \
592     116  QBT 012  Bến xe Miền Tây  Trụ dừng  Quận Bình Tân  Phường An Lạc   
644     725    BX 46  Bến xe Miền Tây    Bến xe  Quận Bình Tân           None   
694     119  QBT 008  Bến xe Miền Tây   Nhà chờ  Quận Bình Tân  Phường An Lạc   

           AddressNo            Street SupportDisability          Status  \
592          358-360  Kinh Dương Vương             Không  Đang khai thác   
644  Bến xe Miền Tây  Kinh Dương Vương 

ValidationError: 2 validation errors for distance_to_oneSchema
src
  value is not a valid integer (type=type_error.integer)
dest
  field required (type=value_error.missing)

In [7]:
query = 'Shortest distance between Đại học Khoa học Tự nhiên cơ sở Nguyễn Văn Cừ and Chợ Lớn?'

In [9]:
print(good_chatter.ask(query)['output'])



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to find the IDs of "Đại học Khoa học Tự nhiên cơ sở Nguyễn Văn Cừ" and "Chợ Lớn" to calculate the distance between them. Since the names are in Vietnamese and might not include tones, I should look for the most reasonable matches in the provided data or similar names without considering the tones.

Action: python_repl_ast
Action Input: print(df['Name'])[0m[36;1m[1;3m0                         Bến xe buýt Sài Gòn
1                               Tôn Thất Tùng
2                            Nguyễn Thị Nghĩa
3                        Trường Emst Thalmann
4       Trạm Trung chuyển trên đường Hàm Nghi
                        ...                  
4392                    Ga tàu thuỷ Bạch Đằng
4393                      Ga tàu thuỷ Bình An
4394                     Ga tàu thuỷ Thanh Đa
4395              Ga tàu thuỷ Hiệp Bình Chánh
4396                    Ga tàu thuỷ Linh Đông
Name: Name, Length: 4397, dtype: object
[0m

ValidationError: 2 validation errors for distance_to_oneSchema
src
  value is not a valid integer (type=type_error.integer)
dest
  field required (type=value_error.missing)

In [6]:
print(bad_chatter.ask(query)['output'])



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: We can calculate the distance between two points using their latitude and longitude coordinates.
Action: python_repl_ast
Action Input: import geopy.distance[0m[36;1m[1;3mModuleNotFoundError: No module named 'geopy'[0m[32;1m[1;3mI need to install the geopy module to calculate the distance between two points using their latitude and longitude coordinates.
Action: python_repl_ast
Action Input: !pip install geopy[0m[36;1m[1;3mSyntaxError: invalid syntax (<unknown>, line 1)[0m[32;1m[1;3mI need to install the geopy module using the command line, not within the Python shell.
Action: command line
Action Input: !pip install geopy[0mcommand line is not a valid tool, try one of [python_repl_ast].[32;1m[1;3mI can install the geopy module using the command line outside of the Python shell.
Action: command line
Action Input: pip install geopy[0mcommand line is not a valid tool, try one of [python_repl_ast].[32;1m

In [7]:
query = 'Find any stops within 2km of Chợ Lớn. Some of the stops might have the same name, so print out the StopId also.'

In [8]:
good_chatter.ask(query)['output']



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: We need to find the ID of the Chợ Lớn stop and then calculate the distance between Chợ Lớn and all other stops to see which ones are within 2km.
Action: Find the ID of the Chợ Lớn stop.
Action Input: df[df['Name'].str.contains('Chợ Lớn', case=False)][0mFind the ID of the Chợ Lớn stop. is not a valid tool, try one of [python_repl_ast, distance_to_one].[32;1m[1;3mI need to use the Python shell to find the ID of the Chợ Lớn stop first before calculating the distances. Let's use the Python shell to search for the Chợ Lớn stop. 
Action: python_repl_ast
Action Input: df[df['Name'].str.contains('Chợ Lớn', case=False)][0m[36;1m[1;3m      StopId     Code                       Name        StopType    Zone  \
101        8     BX14             Bến xe Chợ Lớn          Bến xe  Quận 5   
245       12   Q5 058  Siêu thị Điện máy Chợ Lớn        Trụ dừng  Quận 5   
297      464   Q5 023             Bến xe Chợ Lớn        Trụ dừng

'Agent stopped due to iteration limit or time limit.'

In [4]:
import chatter.tools.shortest_path as sp
sp.load_net(net)

  7%|▋         | 320/4397 [00:00<00:01, 3141.65it/s]

100%|██████████| 4397/4397 [00:00<00:00, 5329.11it/s]
100%|██████████| 4397/4397 [00:02<00:00, 1608.65it/s]


In [6]:
sp.distance_to_one.run(424, 8)

ValidationError: 1 validation error for distance_to_oneSchema
__root__
  distance_to_oneSchema expected dict not int (type=type_error)