Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
3cceca0
fix(logs): rename ft_pr + don't have PR stops as both
fpurcell Apr 16, 2026
9ebc3b6
fix(response): add example modsec data
fpurcell Apr 17, 2026
5c04182
fix(modsec): parse section e (response)
fpurcell Apr 20, 2026
41f5268
fix(test): add cmdline to promote test requests as valid (for testing)
fpurcell Apr 20, 2026
bb36156
fix(test): add cmdline to promote test requests as valid (for testing)
fpurcell Apr 20, 2026
999504e
fix(response): initial check of respose
fpurcell Apr 21, 2026
09574b8
fix(response): check for agencies and modes in response
fpurcell Apr 21, 2026
d6b7002
fix(response): fix modes from modsec response
fpurcell Apr 21, 2026
4b9572e
fix(old logs): fixes to process old logs
fpurcell Apr 22, 2026
140c83d
fix(modes): filter modes and agencies fixes
fpurcell Apr 23, 2026
3042959
feat(agencies): util to count the various agencies in the csv that go…
fpurcell May 18, 2026
8300e7a
fix(response): improve showing trip planner couldn't plan the trip
fpurcell May 19, 2026
776cdbb
fix(ft) add support for parsing the new exceptions data in the request
fpurcell May 19, 2026
ad70509
fix(misc) cleanup and diaper wrap url parser that is not critcal
fpurcell May 19, 2026
bbd7c75
fix(misc) add the ADULT to the reponse processor to make things specific
fpurcell May 19, 2026
b154092
fix(only) bike walk only trips
fpurcell May 19, 2026
23822dd
feat(agency count): add agency count script
fpurcell May 20, 2026
1a1262c
feat(agency count): script running well
fpurcell May 20, 2026
bca2515
feat(agency count): move agency count script into process.sh
fpurcell May 20, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# junk & crap
agencies.txt
logs
files
stats.txt
Expand Down
582 changes: 582 additions & 0 deletions docs/modsec_response.log

Large diffs are not rendered by default.

11 changes: 7 additions & 4 deletions ott/log_parser/control/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@ def load_log_file(file, session):
recs = parser.parse_log_file(file)
except:
recs = None

# modsec?: with no recs from above, maybe this is a mod_security file containing trip plans
#import pdb; pdb.set_trace()
if recs is None or len(recs) == 0:
# with no recs from first parser, maybe this is a mod_security file containing trip plans
#import pdb; pdb.set_trace()
recs = parser_modsec.parse_log_file(file)

if recs and len(recs) > 0:
Expand All @@ -29,6 +30,8 @@ def load_log_file(file, session):
logs.append(rawlog)
RawLog.persist_data(session, logs)

return


def loader():
files, cmdline = utils.cmd_line_loader()
Expand All @@ -45,8 +48,8 @@ def loader():


def load_and_post_process():
loader()
ProcessedRequests.process()
files,cmdline = loader()
ProcessedRequests.process(ignore_test_system=cmdline.test_system)
ProcessedRequests.post_process()


Expand Down
23 changes: 21 additions & 2 deletions ott/log_parser/control/parser_modsec.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,15 +129,31 @@ def parse_section_c(req):
try:
if "query" in sec_c:
if "variables" in sec_c:
vars = sec_c.split("variables\":")
ret_val = vars[1][:-1] # return things right of the variables, except for dangling bracket
#import pdb; pdb.set_trace()
vars = sec_c.split("variables\":")[1]
# extra parse step for 'exentsions' data (tuck exenstions into the query json, and keep dangling bracket)
if '},"extensions":' in vars:
ret_val = vars.replace('},"extensions":', ',"extensions":')
else:
ret_val = vars[:-1] # return things right of the variables, except for dangling bracket
else:
ret_val = sec_c
except Exception as e:
pass
return ret_val


def parse_section_e(req):
"""
section e has the response

--ac12e444-E--
<json> (or <something>)
"""
sec_e = req.get("E", None)
return sec_e


def parse_section_f(req, def_code="520"):
"""
section f has response headers
Expand Down Expand Up @@ -180,6 +196,9 @@ def parse_raw_request(req):
payload = parse_section_c(req)
rec['payload'] = payload

response = parse_section_e(req)
rec['response'] = response

code = parse_section_f(req)
rec['code'] = code

Expand Down
4 changes: 2 additions & 2 deletions ott/log_parser/control/publisher.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ def csv(file_path, chunk_size=10000):
session = utils.make_session(False)
requests = session.query(ProcessedRequests).order_by(ProcessedRequests.ip_hash, ProcessedRequests.log_id).all()
if requests and len(requests) > 0:
fieldnames = requests[0].to_csv_dict().keys()
#import pdb; pdb.set_trace()
csv_columns = requests[0].to_csv_dict().keys()
with open(file_path, mode='w') as csv_file:
csv = file_utils.make_csv_writer(csv_file, fieldnames)
csv = file_utils.make_csv_writer(csv_file, csv_columns)
for r in requests:
if not r.filter_request:
csv.writerow(r.to_csv_dict())
Expand Down
103 changes: 74 additions & 29 deletions ott/log_parser/db/processed_requests.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from ast import Return
#import imp -- MARCH 7 2026 .. removed this depricated core util (any errors?)
from re import S
from sqlalchemy import Column, String, Boolean, Integer, Float, func, and_
from sqlalchemy.orm import relationship
Expand Down Expand Up @@ -54,11 +53,12 @@ class ProcessedRequests(Base):
uselist=False, viewonly=True,
)

def __init__(self, raw_rec):
def __init__(self, raw_rec, ignore_test_system=False):
#import pdb; pdb.set_trace()
super(ProcessedRequests, self)
self.log_id = raw_rec.id
self.ip_hash = utils.obfuscate(raw_rec.ip)
self.app_name = self.get_app_name(raw_rec)
self.app_name = self.get_app_name(raw_rec, ignore_test_system)

# TODO - refactor, this is a confusing mix of model and controller / parser
try:
Expand All @@ -81,13 +81,76 @@ def __init__(self, raw_rec):
self.parse_modes(modes)
self.parse_companies(qs)
self.apply_filters(raw_rec.url)
self.check_response(raw_rec.response)
except:
self.filter_request = -111
log.debug("couldn't parse " + raw_rec.url)

@classmethod
def get_agency_map(cls, tm_only=False):
tm_map = {
"TRIMET:TRAM":"Aerial Tram",
"TRIMET:PSC":"Streetcar",
"TRIMET:TRIMET":"TriMet",
}
rtp_map = {
"CLACKAMAS:ADULT":"Clackamas",
"CTRAN:ADULT":"C-TRAN",
"CTRAN_FLEX:ADULT":"The Current",
"MULT:ADULT":"Multnomah",
"RIDECONNECTION:ADULT":"Ride Connection",
"SAM:ADULT":"SAM",
"SMART:ADULT":"SMART",
"WASH_FLEX":"SPOT",
"WAPARK":"Washington Park",
}

if tm_only:
agency_map = tm_map
else:
agency_map = tm_map | rtp_map

return agency_map

def check_response(self, response):
def find_agencies():
agency_map = self.get_agency_map()
ag = []
for ak in agency_map.keys():
if ak in response:
ag.append(agency_map.get(ak))

ret_val = "" if len(ag) <= 0 else ",".join(ag)
return ret_val

def filter_modes(def_mode="WALK"):
#import pdb; pdb.set_trace()
m = self.modes
if "BUS" not in response: m = m.replace('BUS', '')
if not utils.is_match_any(["RAIL", "SUBWAY", "TRAIN", "TRAM", "GONDOLA"], response): m = m.replace('RAIL', '')
if not utils.is_match_any(["CALL_AGENCY", "COORDINATE_WITH_DRIVER"], response): m = m.replace('FLEX', '')
m = m.replace(',,', ',')
m = m.strip(",$")
if m is None or m == "" or m == ",":
m = def_mode
return m

#import pdb; pdb.set_trace()
if response:
if '"itineraries":[{' in response:
self.agencies = find_agencies()
self.modes = filter_modes()
elif utils.is_match_all(['errors":[{"message"'], response):
self.agencies = None
elif utils.is_match_all(['"itineraries":[]', 'routingErrors', 'code'], response):
self.agencies = None
elif utils.is_match_all(['"itineraries":[]', 'routingErrors'], response):
self.agencies = None


def apply_filters(self, url, fltval=-222):
""" filter out uptime test urls, etc... """
#import pdb; pdb.set_trace()
#import pdb; pdb.set_trace()
if self.filter_request is None:
if 'fromPlace=PDX' in url and ('toPlace=ZOO' in url or 'toPlace=SW%20Zoo%20Rd' in url):
self.filter_request = fltval
Expand Down Expand Up @@ -116,7 +179,7 @@ def apply_filters(self, url, fltval=-222):
self.filter_request = fltval + 55

@classmethod
def get_app_name(cls, rec, def_val="no idea what app..."):
def get_app_name(cls, rec, ignore_test_system=False, def_val="no idea what app..."):
""" trimet specific -- override me for other agencies / uses """
app_name = def_val

Expand All @@ -136,7 +199,7 @@ def get_app_name(cls, rec, def_val="no idea what app..."):

if len(rec.referer) > 3:
referer = rec.referer.lower()
if 'localhost:8000' in referer or 'labs' in referer or 'test.trimet' in referer:
if ignore_test_system is False and ('localhost:8000' in referer or 'labs' in referer or 'test.trimet' in referer):
app_name = TEST_SYSTEM
elif 'call-test' in referer:
app_name = call2
Expand All @@ -158,7 +221,7 @@ def get_app_name(cls, rec, def_val="no idea what app..."):
elif utils.is_old_trimet(rec.url):
app_name = old

if utils.is_developer_api(rec.url):
if ignore_test_system is False and utils.is_developer_api(rec.url):
rec.is_api = True
if app_name is def_val:
app_name = api
Expand Down Expand Up @@ -222,26 +285,7 @@ def parse_agencies(self, qs, tm_only=False):
return the list of agencies implied in the request
will look at the banned agencies param, and trim the list of possible request agencies
"""
tm_map = {
"TRIMET:TRAM":"Aerial Tram",
"TRIMET:PSC":"Streetcar",
"TRIMET:TRIMET":"TriMet",
}
rtp_map = {
"CLACKAMAS":"Clackamas",
"CTRAN":"C-TRAN",
"CTRAN_FLEX":"The Current",
"MULT":"Multnomah",
"RIDECONNECTION:":"Ride Connection",
"SAM":"SAM",
"SMART":"SMART",
"WASH_FLEX":"SPOT",
"WAPARK":"Washington Park",
}
if tm_only:
agency_map = tm_map
else:
agency_map = tm_map | rtp_map
agency_map = self.get_agency_map(tm_only)

# filter banned agencies from the above list
for b in utils.get_banned_agencies(qs):
Expand Down Expand Up @@ -322,6 +366,7 @@ def to_csv_dict(self):
- request datetime
- ???
"""
#import pdb; pdb.set_trace()
ua = utils.clean_useragent(self.log.browser)
browser = utils.get_browser(ua)
url = utils.to_url(self.log)
Expand All @@ -347,7 +392,7 @@ def to_csv_dict(self):
return ret_val

@classmethod
def process(cls, chunk_size=10000):
def process(cls, chunk_size=10000, ignore_test_system=False):
"""
process logs from log file(s)
"""
Expand All @@ -361,7 +406,7 @@ def process(cls, chunk_size=10000):
# step 2: loop thru raw log file entries
processed = []
for l in logs:
p = ProcessedRequests(l)
p = ProcessedRequests(l, ignore_test_system)
processed.append(p)
# step 2b: save off the post-process data in 'chunks'
if len(processed) > chunk_size:
Expand Down
3 changes: 3 additions & 0 deletions ott/log_parser/db/raw_log.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ class RawLog(Base):
date = Column(DateTime())
url = Column(String(2084))
payload = Column(String())
response = Column(String())
code = Column(Integer())
referer = Column(String(2084))
browser = Column(String(2084))
Expand All @@ -25,12 +26,14 @@ def __init__(self, rec):
self.date = utils.convert_apache_dt(rec.get('apache_dt', None))
self.url = rec.get('url', "")
self.payload = rec.get('payload', "")
self.response = rec.get('response', "")
self.code = num_utils.to_int(rec.get('code', 212), 212)
self.referer = rec.get('referer', "")
self.browser = rec.get('browser', "")
self.is_json = rec.get('is_json', False)
#import pdb; pdb.set_trace()


def main():
from ..control.loader import load_log_file
session = utils.make_session(False)
Expand Down
19 changes: 16 additions & 3 deletions ott/log_parser/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,19 +154,28 @@ def obfuscate(input, key=u'key'):
return digest


def cmd_line_loader(prog_name='log_parser/bin/loader', sub_dirs=[""]):
def cmd_line_loader(prog_name='poetry run loader', sub_dirs=[""]):
parser = db_cmdline.db_parser(prog_name, url_required=False)
parser.add_argument(
'--log_directory', '--logs', '-logs', '-l',
required=True,
help="Directory of .log files..."
)
# TODO: why are both logs and files needed?
# file_utils.find_files(cmdline.log_directory, cmdline.files, True)
parser.add_argument(
'--files', '--ff', '-ff',
required=False,
default=".log",
help="Directory of .log files..."
)
parser.add_argument(
'--test_system', '--ts', '-ts',
action='store_true',
required=False,
help="Don't mark any records as coming from a 'test system' (e.g., ability to load test requests and publish things, etc...)."
)

cmdline = parser.parse_args()
files = file_utils.find_files(cmdline.log_directory, cmdline.files, True)
if len(files) == 0:
Expand Down Expand Up @@ -233,8 +242,12 @@ def encode(p):
def to_url(log):
ret_val = log.url
if log.payload and len(log.payload) > 10 and '?' not in log.url:
pl = json.loads(log.payload) # OTP 2.x graphql
ret_val = "{}home/planner-trip/?fromPlace={}&toPlace={}".format(log.referer, encode(pl.get('fromPlace')), encode(pl.get('toPlace')))
#import pdb; pdb.set_trace()
try:
pl = json.loads(log.payload) # OTP 2.x graphql
ret_val = "{}home/planner-trip/?fromPlace={}&toPlace={}".format(log.referer, encode(pl.get('fromPlace')), encode(pl.get('toPlace')))
except Exception as e:
pass
return ret_val


Expand Down
27 changes: 25 additions & 2 deletions ott/log_parser/view/csv.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,29 @@
from collections import Counter
from ott.utils.parse.cmdline.base_cmdline import file_cmdline
from ott.utils import file_utils

import logging
log = logging.getLogger(__file__)


def csv(data: dict, file_name: str):
return None
def modes_plus_agencies(prog_name='poetry run view_csv', file_name='trip_requests.csv'):
cmdline = file_cmdline(prog_name, file_name)
print(f"{cmdline.file}")

data = []
for r in file_utils.read_csv(cmdline.file):
companies = r.get('agencies').strip()
modes = r.get('modes')
if len(companies) > 1:
sep = " -> "
else:
if "BIKE" in modes or "WALK" in modes:
sep = ""
else:
sep = "(COULDN'T PLAN TRIP) "
data.append(f"{companies}{sep}{modes}")
counts = Counter(data)
for s in sorted(counts.items()):
print(f"{s[1]:8} {s[0]}")

return 0
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,11 @@ test_process = "ott.log_parser.db.processed_requests:main"
loader = "ott.log_parser.control.loader:main"
load_and_post_process = "ott.log_parser.control.loader:load_and_post_process"
publisher = "ott.log_parser.control.publisher:main"
parser = "ott.log_parser.control.parser:main"
parser_modsec_test = "ott.log_parser.control.parser_modsec:simple_test"
report = "ott.log_parser.control.reporter:main"

modes_plus_agencies = "ott.log_parser.view.csv:modes_plus_agencies"
parser = "ott.log_parser.control.parser:main"
stats = "ott.log_parser.control.stats:main"

[build-system]
Expand Down
Loading