# core data utilities

> core utilities for data processing: 
> datetime processing, time zone processing, validity, filtering

In [None]:
#| default_exp data.database.deduplication

In [None]:
from asyncio import as_completed
from threading import Thread

#| hide
from nbdev.showdoc import *

In [None]:
#| hide
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [None]:
#| export
from datetime import datetime
import pandas as pd
import pytz
#| export
pd.options.mode.chained_assignment = None

# Precision reduction for start_ltt, start_lng, end_ltt, end_lng and pick distinct values

In [None]:
from tqdm.notebook import tqdm
from sqlalchemy import create_engine, select, func, distinct, MetaData, Table, update, bindparam, Column, insert, desc, asc, and_, or_, not_, Numeric, cast, func
from sqlalchemy.orm import sessionmaker, aliased
import pandas as pd
import numpy as np

In [None]:
sql_eng = create_engine('sqlite:///../data/price_training_raw.db', echo=False)
conn = sql_eng.connect()
my_table = Table('price_training_raw_2024_usd', MetaData(), autoload_with=sql_eng)

In [None]:
query = select(func.count(distinct(my_table.c.dispatch_id)))

In [None]:
with sql_eng.connect() as connection:
    result = connection.execute(query)
    unique_count = result.scalar()
    
print(unique_count)

In [None]:
query = select(my_table).limit(10)
df = pd.read_sql(query, conn, index_col='dispatch_id')
# df[['route_start','route_end']]
df

# order by start_ltt, start_lng, end_ltt, end_lng

In [None]:
sql_eng = create_engine('sqlite:///../data/price_training_raw.db', echo=False)
conn = sql_eng.connect()
metadata = MetaData()
my_table = Table('price_training_raw_2024_usd', metadata, autoload_with=sql_eng)

In [None]:
metadata.reflect(bind=sql_eng)
raw_t = metadata.tables['price_training_raw_2024_usd']
s = select(raw_t).limit(10)
rp = sql_eng.connect().execute(s)
df = pd.DataFrame(rp)
df

## create new table with latitutde and longitude reduced to 5 decimal places

In [None]:
ordered_t = select(my_table).where(
    and_(
        my_table.c.start_ltt.isnot(None),
        my_table.c.start_lng.isnot(None),
        my_table.c.end_ltt.isnot(None),
        my_table.c.end_lng.isnot(None),
    )
).order_by(asc(my_table.c.start_ltt), asc(my_table.c.start_ltt))
ordered_t = ordered_t.select_from(my_table)
ordered_t = ordered_t.alias()

In [None]:
# ordered_t = ordered_t.limit(10)
# df = pd.read_sql(ordered_t.limit(10),sql_eng)
# df

In [None]:
lp_ltt_lng_t = select(ordered_t.c.dispatch_id,
                      cast(ordered_t.c.start_ltt, Numeric(9,5)).label('start_ltt_lp'),
                      cast(ordered_t.c.start_lng, Numeric(9,5)).label('start_lng_lp'),
                      cast(ordered_t.c.end_ltt, Numeric(9,5)).label('end_ltt_lp'),
                      cast(ordered_t.c.end_lng, Numeric(9,5)).label('end_lng_lp'))
lp_ltt_lng_t = lp_ltt_lng_t.select_from(ordered_t)
lp_ltt_lng_t = lp_ltt_lng_t.alias()

In [None]:
# df = pd.read_sql(lp_ltt_lng_t, sql_eng)
# df



In [None]:
distinct_start_lp_t = select(lp_ltt_lng_t.c.dispatch_id, 
                         lp_ltt_lng_t.c.start_ltt_lp,
                         # func.max(lp_ltt_lng_t.c.start_ltt_lp).label('unique_start_ltt_lp'), 
                         # func.max(lp_ltt_lng_t.c.start_lng_lp).label('unique_start_lng_lp'),
                         lp_ltt_lng_t.c.start_lng_lp, 
                         lp_ltt_lng_t.c.end_ltt_lp,
                         lp_ltt_lng_t.c.end_lng_lp
                         ).group_by(lp_ltt_lng_t.c.start_ltt_lp).distinct(lp_ltt_lng_t.c.start_lng_lp)#.subquery()
distinct_start_lp_t = distinct_start_lp_t.alias()

In [None]:
# df = pd.read_sql(distinct_start_lp_t, sql_eng)
# df

In [None]:

distinct_end_lp_t = select(lp_ltt_lng_t.c.dispatch_id, 
                         lp_ltt_lng_t.c.start_ltt_lp,
                         lp_ltt_lng_t.c.start_lng_lp,
                         lp_ltt_lng_t.c.end_ltt_lp,
                         lp_ltt_lng_t.c.end_lng_lp
                         ).group_by(lp_ltt_lng_t.c.end_ltt_lp).distinct(lp_ltt_lng_t.c.end_lng_lp)
distinct_end_lp_t = distinct_end_lp_t.alias()

In [None]:

# df = pd.read_sql(distinct_end_lp_t, sql_eng)
# df

In [None]:
fp_zone_start_t = select(
    ordered_t.c, 
    distinct_start_lp_t.c.start_ltt_lp, 
    distinct_start_lp_t.c.start_lng_lp, 
    distinct_start_lp_t.c.end_ltt_lp, 
    distinct_start_lp_t.c.end_lng_lp
    ).select_from(ordered_t
                  .join(distinct_start_lp_t, 
                        ordered_t.c.dispatch_id == distinct_start_lp_t.c.dispatch_id))
df_start = pd.read_sql(fp_zone_start_t, sql_eng)
df_start

In [None]:
# df_start.to_sql('price_training_raw_2024_usd_start_deduplicated', sql_eng, if_exists='replace', index=True)
# df_start.to_csv('../data/price_training_raw_2024_usd_start_deduplicated.csv', index=False)


In [None]:
fp_zone_end_t = select(
    ordered_t.c,
    distinct_end_lp_t.c.start_ltt_lp,
    distinct_end_lp_t.c.start_lng_lp,
    distinct_end_lp_t.c.end_ltt_lp,
    distinct_end_lp_t.c.end_lng_lp
).select_from(ordered_t
              .join(distinct_end_lp_t, 
                    ordered_t.c.dispatch_id == distinct_end_lp_t.c.dispatch_id)
              )
df_end = pd.read_sql(fp_zone_end_t, sql_eng)
df_end

In [None]:
# df_end.to_sql('price_training_raw_2024_usd_end_deduplicated', sql_eng, if_exists='replace', index=True)
# df_end.to_csv('../data/price_training_raw_2024_usd_end_deduplicated.csv', index=False)



In [None]:
import requests

In [None]:
url = 'https://j1j495o5pk.execute-api.us-east-2.amazonaws.com/upncoming/ride-pricings'

In [None]:
csv_file_list = ['../data/price_training_raw_2024_usd_start_deduplicated.csv',
             '../data/price_training_raw_2024_usd_end_deduplicated.csv']
csv_result_file_list = ['../data/price_training_raw_2024_usd_start_result_deduplicated.csv',
                 '../data/price_training_raw_2024_usd_end_result_deduplicated.csv']
total_rows = [df_start.shape[0], df_end.shape[0]]
result_csv = '../data/dispatch_fixed_zones_label_list.csv'
total_rows

chunk_size = 1000

In [None]:
def get_fixed_zone_one(start_ltt, start_lng, end_ltt, end_lng):
    params = {
        'from_lat': start_ltt,
        'from_lng': start_lng,
        'to_lat': end_ltt,
        'to_lng': end_lng,
    }
    try:
        response = requests.get(url=url, params=params)
    except requests.exceptions.Timeout:
        print('Timeout')
        return None
    except requests.exceptions.TooManyRedirects:
        print('TooManyRedirects')
        return None
        # Tell the user their URL was bad and try a different one
    except requests.exceptions.RequestException as e:
        print('RequestException, Catastrophic error!')
        return None

    except Exception as e:
        print(f"request: {e}")
        return None
    # print('2')
    try:
        res = response.json()
    except Exception as e:
        print(f"json: {e}")
        return None
    # print('3')
    try:
        fix_price_zones = res['fleets'][0]['vehicle_classes'][0]['price_detail']['base_pricing']['fix_price_detail']
    except KeyError as e:
        print(f"No Fixed Price!")
        return None
    except IndexError as e:
        print("IndexError for fix_price_zones")
        return None
    except Exception as e:
        print(f"dict: {e}")
        return None
    # print('4')
    if not isinstance(fix_price_zones,dict):
        print(f"No fix price: {fix_price_zones}")
        return None
    else:
        try:
            return {'from': fix_price_zones['from'], 'to': fix_price_zones['to']}
        except KeyError as e:
            print("KeyError for route")
            return None
       

In [None]:
 
def get_fixed_zone_chunk(chunk):

    for i,r in tqdm(chunk.iterrows(),total=chunk_size,desc='chunk progress', leave=False):
        fpz = get_fixed_zone_one(r['start_ltt_lp'], r['start_lng_lp'], r['end_ltt_lp'], r['end_lng_lp'])
        try:
            chunk.at[i, 'route_start'] = fpz['from']
            chunk.at[i, 'route_end'] = fpz['to']
        except KeyError as e:
            print("Chunk execption: KeyError for route")
            continue
        except Exception as e:
            print(f"Chunk exception: {e}")
            continue

In [None]:
from concurrent.futures import ThreadPoolExecutor, as_completed
import httpx
from http import HTTPStatus
from concurrent import futures
from enum import Enum

In [None]:
FZPQueryStatus = Enum('FZPQueryStatus', 'JsonError HTTPXStatusError HTTPXError IndexOrKeyError DictError NoFixedPrice Success')
fzp = {'from': None, 'to': None}
def get_one_fixed_zone(start_ltt, start_lng, end_ltt, end_lng) -> FZPQueryStatus:
    global fzp
    params = {
        'from_lat': start_ltt,
        'from_lng': start_lng,
        'to_lat': end_ltt,
        'to_lng': end_lng,
    }
    try:
        # response = requests.get(url=url, params=params)
        response = httpx.get(url=url, params=params)
    except httpx.HTTPStatusError as exc:
        return FZPQueryStatus.HTTPXStatusError
    except Exception as exc:
        return FZPQueryStatus.HTTPXError
    
    try:
        res = response.json()
    except Exception as exc:
        return FZPQueryStatus.JsonError
    try:
        fix_price_zones = res['fleets'][0]['vehicle_classes'][0]['price_detail']['base_pricing']['fix_price_detail']
    except Exception as exc:
        return FZPQueryStatus.IndexOrKeyError
    # print('4')
    if not isinstance(fix_price_zones,dict):
        return FZPQueryStatus.DictError
    else:
        try:
            fzp['from'] = fix_price_zones['from']
            fzp['to']   = fix_price_zones['to']
            return FZPQueryStatus.Success
        except KeyError as e:
            return FZPQueryStatus.NoFixedPrice
            

non_fzp_count = 0
max_concurrent = 20
for i,chunk in enumerate(tqdm(pd.read_csv(csv_file_list[0],index_col='dispatch_id', chunksize=chunk_size), total=total_rows[0]//chunk_size+1, desc='Overall Progress')):
    chunk = chunk.astype({'route_start': str, 'route_end': str})
    non_fzp_count_chunk = 0
    # get_fixed_zone_chunk(chunk)
    
    with ThreadPoolExecutor(max_workers=max_concurrent) as executor:
        to_do_map = {} # list[futures.Future] = [] 
        # for ind,r in tqdm(chunk.iterrows(),total=chunk_size,desc='chunk progress', leave=False):
        for ind,r in chunk.iterrows():
            future = executor.submit(get_one_fixed_zone, r['start_ltt_lp'], r['start_lng_lp'], r['end_ltt_lp'], r['end_lng_lp'])
            to_do_map[future] = ind
        done_iter = as_completed(to_do_map)
        done_iter = tqdm(done_iter, total=len(to_do_map), desc='chunk progress', leave=False)
        
        for future in done_iter:
            try:
                status = future.result()
            except Exception as e:
                # print(f"Exception: {e}")
                continue
            
            if status == FZPQueryStatus.Success:
                ind = to_do_map[future]
                try:
                    chunk.at[ind, 'route_start'] = fzp['from']
                    chunk.at[ind, 'route_end'] = fzp['to']
                except KeyError as e:
                    non_fzp_count_chunk += 1
                    # print(f"\r {e}: {non_fzp_count_chunk}/{chunk_size}")
                    continue
                except Exception as e:
                    non_fzp_count_chunk += 1
                    # print(f"\r {e}: {non_fzp_count_chunk}/{chunk_size}")
                    continue
            else:
                non_fzp_count_chunk += 1
                # print(f"\r non fzp chunk: {non_fzp_count_chunk}/{chunk_size}")
                continue
        
    non_fzp_count += non_fzp_count_chunk
    scanned_row_number = (i+1)*chunk_size
    try:
        # pd.DataFrame(data=route_list, columns=['dispatch_id', 'rou:w
        # te_start', 'route_end']).to_csv(result_csv, mode='a', header=False)
        with open(csv_result_file_list[0], 'a') as f:
            chunk.to_csv(f, header=f.tell()==0,chunksize=chunk_size)
        # chunk.to_csv(path_or_buf=csv_result_file_list[0],mode='a',chunksize=chunk_size)
        print(f"\r Non_FZP count: {non_fzp_count}/{scanned_row_number}")
    except Exception as e:
        print(f"\r {e}, Non_FZP count: {non_fzp_count}/{scanned_row_number} ")
        continue

In [None]:
# def get_one_fixed_zone1(start_ltt, start_lng, end_ltt, end_lng):
#     params = {
#         'from_lat': start_ltt,
#         'from_lng': start_lng,
#         'to_lat': end_ltt,
#         'to_lng': end_lng,
#     }
#     try:
#         response = requests.get(url=url, params=params)
#     except requests.exceptions.Timeout:
#         print('Timeout')
#         return None
#     except requests.exceptions.TooManyRedirects:
#         print('TooManyRedirects')
#         return None
#         # Tell the user their URL was bad and try a different one
#     except requests.exceptions.RequestException as e:
#         print('RequestException, Catastrophic error!')
#         return None
# 
#     except Exception as e:
#         print(f"request: {e}")
#         return None
#     # print('2')
#     try:
#         res = response.json()
#     except Exception as e:
#         print(f"json: {e}")
#         return None
#     # print('3')
#     try:
#         fix_price_zones = res['fleets'][0]['vehicle_classes'][0]['price_detail']['base_pricing']['fix_price_detail']
#     except KeyError as e:
#         # print(f"No Fixed Price!")
#         return None
#     except IndexError as e:
#         # print("IndexError for fix_price_zones")
#         return None
#     except Exception as e:
#         # print(f"dict: {e}")
#         return None
#     # print('4')
#     if not isinstance(fix_price_zones,dict):
#         # print(f"No fix price: {fix_price_zones}")
#         return None
#     else:
#         try:
#             return {'from': fix_price_zones['from'], 'to': fix_price_zones['to']}
#         except KeyError as e:
#             # print("KeyError for route")
#             return None
# 
# non_fzp_count = 0
# for i,chunk in enumerate(tqdm(pd.read_csv(csv_file_list[0],index_col='dispatch_id', chunksize=chunk_size), total=total_rows[0]//chunk_size+1, desc='Overall Progress')):
#     chunk = chunk.astype({'route_start': str, 'route_end': str})
#     non_fzp_count_chunk = 0
#     # get_fixed_zone_chunk(chunk)
#     for ind,r in tqdm(chunk.iterrows(),total=chunk_size,desc='chunk progress', leave=False):
#         fpz = get_one_fixed_zone1(r['start_ltt_lp'], r['start_lng_lp'], r['end_ltt_lp'], r['end_lng_lp'])
#         if fpz is None:
#             non_fzp_count_chunk += 1
#             # print(f"\r non fzp chunk: {non_fzp_count_chunk}/{chunk_size}")
#             continue
#         try:
#             chunk.at[ind, 'route_start'] = fpz['from']
#             chunk.at[ind, 'route_end'] = fpz['to']
#         except KeyError as e:
#             non_fzp_count_chunk += 1
#             # print(f"\r {e}: {non_fzp_count_chunk}/{chunk_size}")
#             continue
#         except Exception as e:
#             non_fzp_count_chunk += 1
#             # print(f"\r {e}: {non_fzp_count_chunk}/{chunk_size}")
#             continue
# 
#     non_fzp_count += non_fzp_count_chunk
#     scanned_row_number = (i+1)*chunk_size
#     try:
#         # pd.DataFrame(data=route_list, columns=['dispatch_id', 'rou:w
#         # te_start', 'route_end']).to_csv(result_csv, mode='a', header=False)
#         with open(csv_result_file_list[0], 'a') as f:
#             chunk.to_csv(f, header=f.tell()==0,chunksize=chunk_size)
#         # chunk.to_csv(path_or_buf=csv_result_file_list[0],mode='a',chunksize=chunk_size)
#         print(f"\r Non_FZP count: {non_fzp_count}/{scanned_row_number}")
#     except Exception as e:
#         print(f"\r {e}, Non_FZP count: {non_fzp_count}/{scanned_row_number} ")
#         continue

In [None]:
# for chunk in tqdm(pd.read_csv(csv_file_list[0],index_col='dispatch_id', chunksize=chunk_size), total=total_rows[0]//chunk_size+1, desc='Overall Progress'):
# # for chunk in pd.read_csv(csv_file_list[0],index_col='dispatch_id', chunksize=chunk_size):
#     # [chunk[r] for r in chunk]
#     # l = [r for r in chunk.iterrows()]
#     # l
#     # print(chunk.dtypes)
#     # route_list = []
#     chunk = chunk.astype({'route_start': str, 'route_end': str})
#     for i,r in tqdm(chunk.iterrows(),total=chunk_size,desc='chunk progress', leave=False):
#     # for i,r in chunk.iterrows():
#         # l = [i, r['start_ltt'], r['start_lng'], r['end_ltt'], r['end_lng']]
#         # print(l)
#         params = {
#             'from_lat': r['start_ltt_lp'],
#             'from_lng': r['start_lng_lp'],
#             'to_lat': r['end_ltt_lp'],
#             'to_lng': r['end_lng_lp'],
#         }
#         try:
#             response = requests.get(url=url, params=params)
#         except requests.exceptions.Timeout:
#             print('Timeout')
#             continue
#         except requests.exceptions.TooManyRedirects:
#             print('TooManyRedirects')
#             continue
#             # Tell the user their URL was bad and try a different one
#         except requests.exceptions.RequestException as e:
#             print('RequestException, Catastrophic error!')
#             continue
#             # catastrophic error. bail.
#             # raise SystemExit(e)
# 
#         except Exception as e:
#             print(f"request: {e}")
#             continue
#         # print('2')
#         try:
#             res = response.json()
#         except Exception as e:
#             print(f"json: {e}")
#             continue
#         # print('3')
#         try:
#             fix_price_zones = res['fleets'][0]['vehicle_classes'][0]['price_detail']['base_pricing']['fix_price_detail']
#         except KeyError as e:
#             print(f"No Fixed Price!")
#             continue
#         except IndexError as e:
#             print("IndexError for fix_price_zones")
#             continue
#         except Exception as e:
#             print(f"dict: {e}")
#             continue
#         # print('4')
#         if not isinstance(fix_price_zones,dict):
#             print(f"No fix price: {fix_price_zones}")
#         else:
#             try:
#                 # route_list.append((i, fix_price_zones['from'], fix_price_zones['to']))
#                 # fix_zone_routes_list.append(route)
#                 chunk.at[i, 'route_start'] = fix_price_zones['from']
#                 chunk.at[i, 'route_end'] = fix_price_zones['to']
#                 # ins = insert(fixed_zone_routes).values(
#                 #     start=fix_price_zones['from'], end=fix_price_zones['from'], dispatch_id= pt        ._mapping['dispatch_id'])
#                 # stmt = (
#                 #     update(raw)  # 'raw' is your table object
#                 #     .where(raw.c.dispatch_id == int(i))
#                 #     .values(
#                 #         route_start = fix_price_zones['from'],
#                 #         route_end = fix_price_zones['to']
#                 #     )
#                 # )
#                 # conn.execute(stmt)
#                 # conn.commit()
#             except KeyError as e:
#                 print("KeyError for route")
#                 continue
#     
#     try:
#         # pd.DataFrame(data=route_list, columns=['dispatch_id', 'route_start', 'route_end']).to_csv(result_csv, mode='a', header=False)
#         chunk.to_csv(path_or_buf=csv_result_file_list[0],mode='a',chunksize=chunk_size)
#     except Exception as e:
#         print(f"csv: {e}")
#         continue
#     
#     # with sqlite_eng.begin() as conn:
#     #     conn.execute(
#     #         stmt, 
#     #         [
#     #             {
#     #                 'b_dispatch_id': i,
#     #                 'route_start': r['route_start'],
#     #                 'route_end': r['route_end']
#     #             }
#     #             for i,r in chunk.iterrows()
#     #         ],
#     #     )
#     #     conn.commit()
# 

# Query FP-Server and add label to database

In [None]:
# new_columns = [Column(col.name, col.type, primary_key=col.primary_key) for col in my_table.columns]
# new_table = Table('price_training_raw_2024_usd_reordered', metadata, *new_columns)
# new_table.create(sql_eng)
new_table = Table('price_training_raw_2024_usd_reordered', metadata, autoload_with=sql_eng)


In [None]:
Session = sessionmaker(bind=sql_eng)
session = Session()

## insert data into new table: fp_zone_start_t & fp_zone_end_t

In [None]:
chunk_size = 1000
total_rows = len(ordered_rows)
total_rows
total = total_rows//chunk_size+1
total

In [None]:
for chunk in tqdm(range(total), total=total, desc='Overall Processing'):
    start = chunk*chunk_size
    end = (chunk+1)*chunk_size
    if end > total_rows:
        end = total_rows
    chunk_data = ordered_rows[start:end]
    batch = [dict(zip(cols,row)) for row in chunk_data]
    result = session.execute(insert(new_table).values(batch))
    session.commit()
    # for row in tqdm(chunk_data, total=len(chunk_data), desc='Chunk Processing'):
    #     session.execute(new_table.insert().values(row))
session.close()

In [None]:
df = pd.DataFrame(data=ordered_rows)

In [None]:
df = pd.read_sql(fp_zone_t, sql_eng)
df

In [None]:
# distinct_t = select(distinct(lp_ltt_lng_t.c.start_lng_lp))
# distinct_t = distinct_t.limit(1000)#.distinct(lp_ltt_lng_t.c.start_ltt)
# df = pd.read_sql(distinct_t, sql_eng)
# df

## Create new table with ordered data

In [None]:
ordered_t = select(my_table).where(
    and_(
        my_table.c.start_ltt.isnot(None),
        my_table.c.start_lng.isnot(None),
    )
).order_by(asc(my_table.c.start_ltt), asc(my_table.c.start_ltt))
ordered_t = ordered_t.select_from(my_table)


In [None]:
with sql_eng.connect() as connection:
    result = connection.execute(ordered_t)
    ordered_rows = result.fetchall()
# ordered_rows[:3]

In [None]:
data = ordered_rows[:10]
cols = [c.name for c in ordered_t.subquery().columns]
df = pd.DataFrame(data=data, columns=cols)
df

In [None]:

batch = [dict(zip(cols, row)) for row in data]
batch[:2]

In [None]:
for row in tqdm(result, total=unique_count):
    for chunk in tqdm(pd.read_sql(query.statement, conn, index_col='dispatch_id', chunksize=chunk_size), total=total_rows//chunk_size+1, desc='Overall Processing'):
    route_list = []


In [None]:
with sql_eng.connect() as conn:
    result = conn.execute(ordered_t)
    for row in tqdm(result, total=unique_count):
        session.execute(new_table.insert().values(row))
    session.commit()

In [None]:
# list(my_table.columns)

In [None]:
# metadata_new = MetaData()
# my_table.to_metadata(metadata_new)
# metadata_new.tables

In [None]:
new_table = Table('price_training_raw_2024_usd_geo_ordered', metadata, *my_table.columns)
# list(new_table.columns)
# new_table.name = 'price_training_raw_2024_usd_geo_ordered'
# metadata_new.create_all(sql_eng)

# reduce precision of start_ltt, start_lng, end_ltt, end_lng by 3 digits

In [None]:
df.loc[:,'route_start'] = 'start'
df

In [None]:

stmt = (
    update(my_table)  # 'raw' is your table object
    .where(my_table.c.dispatch_id == 822019)
    .values(
        route_start='New Zone C',
        route_end='New Zone D'
    )
)
print(stmt)

In [None]:
compiled = stmt.compile()
compiled.params

In [None]:
with sql_eng.connect() as conn:
    result = conn.execute(stmt)
    conn.commit()

In [None]:
s = select(my_table).where(my_table.c.dispatch_id == 822019)
rp = sql_eng.connect().execute(s)
results = rp.fetchall()
results

In [None]:
df

In [None]:
for i,r in df.iterrows():
    l = [i, r['ride_id'], r['dispatch_amount'],r['fleet']]
    # print(r)
    df.at[i,'route_end'] = 'Shanghai'
    stmt = (
        update(my_table)
        .where(my_table.c.dispatch_id == int(i))
        .values(
            route_start='Beijing',
            route_end='Shanghai',
        )
    )
    with sql_eng.connect() as conn:
        conn.execute(stmt)
        conn.commit()
    # print('----')
    print(l)
df

# Update data batchwise in sqlite db

In [None]:
df.loc[:,'route_start'] = 'Zone 0'
df.loc[:,'route_end'] = 'Zone 1'
df

In [None]:
stmt = (
    update(my_table)  # 'raw' is your table object
    .where(my_table.c.dispatch_id == bindparam('b_dispatch_id'))
    .values(
        route_start=bindparam('route_start'),
        route_end=bindparam('route_end')
    )
)
print(stmt)

In [None]:
l = [ 
            {'dispatch_id': i,
             'route_start': r['route_start'],
             'route_end': r['route_end']} 
            for i,r in df.iterrows()
        ]
l

In [None]:

with sql_eng.begin() as conn:
    conn.execute(
        stmt,
        [ 
            {'b_dispatch_id': i,
             'route_start': r['route_start'],
             'route_end': r['route_end']} 
            for i,r in df.iterrows()
        ],
    )
    conn.commit()
    

In [None]:
s = select(my_table).limit(20)
rp = sql_eng.connect().execute(s)
results = rp.fetchall()
df = pd.DataFrame(results)
df


In [None]:
for i,r in df.iterrows():
    print(i, r['ride_id'], r['dispatch_amount'],r['fleet'])

In [None]:

# df.to_sql('price_training_raw_2024_usd', sqlite_eng, if_exists= 'replace',index=True, index_label='dispatch_id')

In [None]:

for r in df.itertuples():
    # print(r)
    print(r.ride_id, r.fleet)
    df.at[r.Index, 'route_start'] = 'PVG'
    # r.route_start = 'start'
    # r.route_end = 'end'
df


In [None]:

# df.to_sql('price_training_raw_2024_usd', sqlite_eng, if_exists= 'replace',index=True, index_label='dispatch_id')





In [None]:
#| hide
import nbdev; nbdev.nbdev_export()