In [None]:
#| output: false
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.io as pio
import json
import requests
import pickle

pd.options.plotting.backend = "plotly"
pio.templates.default = "plotly_white"

df = pd.read_csv("https://raw.githubusercontent.com/guebin/DV2023/main/posts/NYCTaxi.csv").assign(
    log_trip_duration=lambda df: np.log(df.trip_duration),
    pickup_datetime=lambda df: df.pickup_datetime.apply(pd.to_datetime),
    dropoff_datetime=lambda df: df.dropoff_datetime.apply(pd.to_datetime),
    dist=lambda df: np.sqrt(
        (df.pickup_latitude - df.dropoff_latitude) ** 2
        + (df.pickup_longitude - df.dropoff_longitude) ** 2
    ),
    # --- #
    vendor_id=lambda df: df.vendor_id.map({1: "A", 2: "B"}),
)

# Calculate additional attributes
df = df.assign(
    speed=lambda df: df.dist / df.trip_duration,
    pickup_hour=lambda df: df.pickup_datetime.dt.hour,
    dropoff_hour=lambda df: df.dropoff_datetime.dt.hour,
    dayofweek=lambda df: df.pickup_datetime.dt.dayofweek,
)

# Reduce data size for faster processing
df_small = df[::100].reset_index(drop=True)

# Convert day of week
day_map = {0: "월", 1: "화", 2: "수", 3: "목", 4: "금", 5: "토", 6: "일"}
df_small["dayofweek"] = df_small["dayofweek"].map(day_map)

# Calculate average speed
avg_speed_by_day_time = (
    df_small.groupby(["dayofweek", "pickup_hour"])["speed"]
    .mean()
    .reset_index()
)

# Line chart visualization
fig1 = px.line(
    avg_speed_by_day_time,
    x="pickup_hour",
    y="speed",
    color="dayofweek",
    title="Average Speed by Day of Week and Time of Day",
    labels={
        "pickup_hour": "Pickup Hour",
        "speed": "Average Speed (km/h)",
        "dayofweek": "Day of Week",
    },
)
# Calculate average distance
avg_distance_by_hour = (
    df_small.groupby("pickup_hour")["dist"]
    .mean()
    .reset_index()
)

# Line chart visualization
fig2 = px.line(
    avg_distance_by_hour,
    x="pickup_hour",
    y="dist",
    title="Average Distance by Time of Day",
    labels={
        "pickup_hour": "Pickup Hour",
        "dist": "Average Distance (km)",
    },
)

# 기말고사1-(1)


In [None]:
fig1.show()

In [None]:
fig2.show()

)