In [13]:
import pandas as pd
from sodapy import Socrata

# Connect to the data.ny.gov API
client = Socrata("data.ny.gov", None)

# Fetch a sample of the data to inspect the columns
sample_results = client.get("wujg-7c2s", limit=5)

# Convert to pandas DataFrame
sample_df = pd.DataFrame.from_records(sample_results)

# Print the available columns
print("Available Columns:")
print(sample_df.columns)




Available Columns:
Index(['transit_timestamp', 'transit_mode', 'station_complex_id',
       'station_complex', 'borough', 'payment_method', 'fare_class_category',
       'ridership', 'transfers', 'latitude', 'longitude', 'georeference',
       ':@computed_region_kjdx_g34t', ':@computed_region_yamh_8v7k',
       ':@computed_region_wbg7_3whc'],
      dtype='object')


In [20]:
import pandas as pd
from sodapy import Socrata
from datetime import datetime, timedelta
import os

# Connect to the data.ny.gov API
client = Socrata("data.ny.gov", None)

# Define the start and end times for May 17, 2024
start_time = "2024-05-17T00:00:00.000"
end_time = "2024-05-17T23:59:59.999"

# Fetch all data for May 17, 2024
results = client.get(
    "wujg-7c2s",
    where=f"transit_timestamp >= '{start_time}' AND transit_timestamp <= '{end_time}'",
    limit=50000  # Set a higher limit to capture all records
)

# Convert to pandas DataFrame
results_df = pd.DataFrame.from_records(results)

# Generate timestamps for every second of the day
start_time_dt = datetime(2024, 5, 17, 0, 0, 0)
timestamps = [start_time_dt + timedelta(seconds=i) for i in range(86400)]

# Create a DataFrame with all timestamps
timestamps_df = pd.DataFrame(timestamps, columns=["transit_timestamp"])
timestamps_df["transit_timestamp"] = timestamps_df["transit_timestamp"].dt.strftime("%Y-%m-%dT%H:%M:%S.000")

# Merge the generated timestamps with actual data
merged_df = pd.merge(timestamps_df, results_df, on="transit_timestamp", how="left")

# Save the complete data to CSV
os.makedirs("./data", exist_ok=True)
file_path = "./data/may_17_2024_full_day.csv"
merged_df.to_csv(file_path, index=False)

file_path




'./data/may_17_2024_full_day.csv'