In [None]:
import polars as pl
import polars.selectors as cs
import plotly.express as px


df = (
    pl.read_csv(
        'LockerNYC_Reservations_20250903.csv',
    )
    .rename(    
        lambda c: 
            c.upper()          # all column names to upper case
            .replace(' ', '_') # replace blanks with underscores
            .replace(r'(', '') # replace left parens with underscores
            .replace(r')', '') # replace left parens with underscores
    )
    .select(
        ['TYPE', 'DELIVERED',
        'RECEIVED',
        'LOCKER_NAME', 'LOCKER_BOX_DOOR', 'LOCKER_SIZE', 
        'PICKUP_DURATION', 'DELIVERY_DURATION', 
        'LOCATION_TYPE', 'ADDRESS', 'LATITUDE', 'LONGITUDE', 
        'BOROUGH',
        'RECEIVE_DATE', 'CREATED_DATE', 'DELIVERY_DATE', 'WITHDRAW_DATE', 'EXPIRE_DATE',
        ],
    )
    .with_columns(cs.ends_with('_DATE').str.to_datetime(format="%m/%d/%Y %H:%M"))
    .with_columns(ZIP_CODE = pl.col('ADDRESS').str.split(' ').list.last())
    .with_columns(ADDRESS = pl.col('ADDRESS').str.split(',').list.first())
)
print(df.sample(3))


In [None]:
(
    df
    .filter(pl.col('CREATED_DATE') != pl.col('RECEIVE_DATE'))
    .filter(pl.col('PICKUP_DURATION').is_not_null())
    .sample(10)
    .glimpse()
)

In [None]:
(
    df
    .filter(pl.col('CREATED_DATE') != pl.col('RECEIVE_DATE'))
    .filter(pl.col('PICKUP_DURATION').is_not_null())
    .select('CREATED_DATE', 'DELIVERY_DATE', 'RECEIVE_DATE','WITHDRAW_DATE', 'EXPIRE_DATE' )
    .sample(5)
    .transpose(include_header=True)
    .sort('column_0')
)



# .str.strptime(pl.Datetime, fmt="%m/%d/%Y %H:%M")


In [None]:
for c in df.columns:
    print(
        c, 
        df[c]
        .value_counts()
        .with_columns(
            PCT = (100*pl.col('count')/pl.col('count').sum()).cast(pl.Float32)
        )
    )

In [None]:
print(list(df.columns))
print()

In [None]:
print(sorted(list(set(df['BOROUGH']))))

In [None]:
fig = px.scatter_map(
    df,
    lat = 'LATITUDE',
    lon = 'LONGITUDE',
    color='BOROUGH'
)
fig.show()

In [None]:
group_by_cols = ['LOCKER_NAME', 'ADDRESS', 'LOCATION_TYPE', 'LATITUDE', 'LONGITUDE', 'BOROUGH']
df_group_by = (
    df
    .group_by(group_by_cols).len()
    .rename({'len':  'COUNT'})
    .sort('COUNT')
)
df_group_by
# df_group_by['len'].sum()

In [None]:
df.columns