In [2]:
import requests

url = "https://raw.githubusercontent.com/Hitchwiki/hitchhiking-data-standard/refs/heads/main/python/python.py"
response = requests.get(url)

with open("data_standard_pydantic_model.py", "w") as f:
    f.write(response.text)

In [3]:
from data_standard_pydantic_model import Hitchhiker, HitchhikingRecord, Location, Signal, Stop

In [4]:
import sqlite3
import pandas as pd
from tqdm import tqdm
import os
import wget
from dotenv import load_dotenv
load_dotenv()

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None) 


In [5]:
url = 'https://hitchmap.com/dump.sqlite'
filename = 'dump.sqlite'
if os.path.exists(filename):
        os.remove(filename)
filename = wget.download(url)

In [6]:
fn = 'dump.sqlite'
points = pd.read_sql('select * from points where not banned', sqlite3.connect(fn))
points["datetime"] = points["datetime"].astype("datetime64[ns]")

points.loc[points["datetime"] < "2000-01-01", "datetime"] = None

# cleaning invalid timestamps
points["ride_datetime"].replace("0224-10-31T21:30", None, inplace=True)
points["ride_datetime"].replace("0025-03-07T08:00", None, inplace=True)
points["ride_datetime"].replace("1014-11-04T14:30", None, inplace=True)
points["ride_datetime"].replace("0202-04-03T18:50", None, inplace=True)

points["ride_datetime"] = points["ride_datetime"].astype("datetime64[ns]")
len(points)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  points["ride_datetime"].replace("0224-10-31T21:30", None, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  points["ride_datetime"].replace("0025-03-07T08:00", None, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the 

67559

In [7]:
points.head()

Unnamed: 0,id,lat,lon,rating,country,wait,nickname,comment,datetime,reviewed,banned,ip,dest_lat,dest_lon,signal,ride_datetime,user_id,from_hitchwiki
0,0,40.974714,27.511654,3.0,TR,,Tamergem,"If you avoid the mini busses, you can get on a ride within 10 minutes, and Tekirdag city is a bridge between Istanbul and Greece. I always use that city center spot and it is quite good",2011-05-26 10:06:17,1,0,,,,,NaT,,1.0
1,1,32.072756,34.793444,4.0,IL,,,,NaT,1,0,,,,,NaT,,
2,2,41.727928,27.220731,4.0,TR,,,,NaT,1,0,,,,,NaT,,
3,3,41.099858,29.007339,3.0,TR,,Xavierallard,There is a lot of traffic there and little space to stop. I found it hard.,2011-03-15 12:52:11,1,0,,,,,NaT,,1.0
4,4,30.169989,66.999612,3.0,PK,,,,NaT,1,0,,,,,NaT,,


In [8]:
# assume that during the lifershalte time the timestamps where not always set
# thus attribute this part of the dataset to the lifershalte
no_date = points[points["datetime"].isna()]
with_date = points[~points["datetime"].isna()]

lift = pd.concat([no_date, with_date[with_date["datetime"] < "2010-08-11"]])

wiki = with_date[(with_date["datetime"] >= "2010-08-11") & (with_date["datetime"] < "2022-10-13")]

map = with_date[with_date["datetime"] >= "2022-10-13"]

In [9]:
len(lift), len(wiki), len(map), len(lift) + len(wiki) + len(map)

(7477, 42466, 17616, 67559)

In [10]:
def map_signal(signal: str) -> Signal:
    if not signal:
        return None

    if signal == "sign":
        return Signal(
            methods=["sign"],
        )
    elif signal == "thumb":
        return Signal(
            methods=["thumb"],
        )
    elif signal == "ask":
        return Signal(
            methods=["asking"],
        )
    elif signal == "ask-sign":
        return Signal(
            methods=["asking", "sign"],
        )
    else:
        return None


def create_record_from_row(row: pd.Series, source: str, license: str, rating_formula= lambda x: x) -> HitchhikingRecord:
    stops = [
        Stop(
            location=Location(latitude=row["lat"], longitude=row["lon"], is_exact=True),
            arrival_time=row["ride_datetime"].strftime("%Y-%m-%dT%H:%M:%S") if pd.notna(row["ride_datetime"]) else None,
            departure_time=(row["ride_datetime"] + pd.to_timedelta(row["wait"], unit="m")).strftime(
                "%Y-%m-%dT%H:%M:%S"
            )
            if pd.notna(row["ride_datetime"]) and pd.notna(row["wait"])
            else None,
            waiting_duration=f"{int(row['wait'])}M" if pd.notna(row["wait"]) else None,
        ),
    ]
    if pd.notna(row["dest_lat"]) and pd.notna(row["dest_lon"]):
        stops.append(Stop(location=Location(latitude=row["dest_lat"], longitude=row["dest_lon"], is_exact=False)))

    record = HitchhikingRecord(
        version="0.0.0",
        stops=stops,
        rating=rating_formula(row["rating"]),
        hitchhikers=[
            Hitchhiker(
                nickname=row["nickname"] if pd.notna(row["nickname"]) else "Anonymous"
            )
        ],
        comment=row["comment"],
        signals=[map_signal(row["signal"])] if row["signal"] else None,
        occupants=None,
        mode_of_transportation=None,
        ride=None,
        declined_rides=None,
        source=source,
        license=license,
        submission_time=row["datetime"].strftime("%Y-%m-%dT%H:%M:%S") if pd.notna(row["datetime"]) else None,
    )

    return record

In [11]:
records = []

for _, row in tqdm(lift.iterrows(), total=len(lift)):
    records.append(
        create_record_from_row(
            row,
            source="liftershalte.info",
            license="cc-by-sa-4.0",
        )
    )

for _, row in tqdm(wiki.iterrows(), total=len(wiki)):
    records.append(
        create_record_from_row(
            row,
            source="hitchwiki.org",
            license="cc-by-sa-4.0",
        )
    )

for _, row in tqdm(map.iterrows(), total=len(map)):
    records.append(
        create_record_from_row(
            row,
            source="hitchmap.com",
            license="odbl",
        )
    )

100%|██████████| 7477/7477 [00:00<00:00, 14839.19it/s]
100%|██████████| 42466/42466 [00:03<00:00, 12419.67it/s]
100%|██████████| 17616/17616 [00:01<00:00, 9853.29it/s] 


In [12]:
print(records[0].model_dump_json(indent=2, exclude_none=True))

{
  "version": "0.0.0",
  "stops": [
    {
      "location": {
        "latitude": 32.0727564373025,
        "longitude": 34.7934436798096,
        "is_exact": true
      }
    }
  ],
  "rating": 4,
  "hitchhikers": [
    {
      "nickname": "Anonymous"
    }
  ],
  "source": "liftershalte.info",
  "license": "cc-by-sa-4.0"
}


In [13]:
# records_dicts = [r.model_dump(exclude_none=False, by_alias=True) for r in records]

In [14]:
from post_data_standard import NostrHitchhikingPostDataStandard

In [15]:
poster = NostrHitchhikingPostDataStandard()

Posting as npub npub169ll2xluxt2fy9lgeddl54v2tfuwdjlrafxeg7ktclc3efw9m02szsw962


In [16]:
records[0]

HitchhikingRecord(version='0.0.0', stops=[Stop(location=Location(latitude=32.0727564373025, longitude=34.7934436798096, is_exact=True), arrival_time=None, departure_time=None, waiting_duration=None)], rating=4, hitchhikers=[Hitchhiker(origin_location=None, origin_country=None, year_of_birth=None, gender=None, languages=None, was_driver=None, nickname='Anonymous', hitchhiking_since=None, reasons_to_hitchhike=None)], comment=None, signals=None, occupants=None, mode_of_transportation=None, ride=None, declined_rides=None, source='liftershalte.info', license='cc-by-sa-4.0', submission_time=None)

In [None]:
poster.post(ride_record=records[-1])

vars(event)
{'content': '{"version":"0.0.0","stops":[{"location":{"latitude":47.60448057677427,"longitude":21.569467198241913,"is_exact":true},"arrival_time":"2025-08-05T16:52:00","departure_time":"2025-08-05T17:32:00","waiting_duration":"40M"},{"location":{"latitude":47.66346083959451,"longitude":21.513558290898807,"is_exact":false}}],"rating":3,"hitchhikers":[{"nickname":"Anonymous"}],"comment":"Fast '
            "traffic, but maybe in the city it's "
            'better.","signals":[{"methods":["thumb"]}],"source":"hitchmap.com","license":"odbl","submission_time":"2025-08-06T19:52:55"}',
 'created_at': 1756650190,
 'id': '27ef9627895d92a9464fd9358ffa18886b23d0733390680e45d464d1aa59109c',
 'kind': 36820,
 'pubkey': 'd17ff51bfc32d49217e8cb5bfa558a5a78e6cbe3ea4d947acbc7f11ca5c5dbd5',
 'sig': '083cc95137571be5f920f860903a3a2fc2b9e156269bd51ba77a14cccb9b2a7f35f7e2784dc9fe9ee8cdb83a644ff419ef3b739ca319a0cd2dd725f6238921b2',
 'tags': [['expiration', 0],
          ['d', 'hitchmap.com-95d92

RuntimeError: This event loop is already running

Setting websocket_ping_timeout=60


: 