In [12]:
import pandas as pd
import numpy as np
import gtfs_kit as gk
from dataclasses import dataclass
from typing import Dict, List, Tuple, Optional, Any
from datetime import datetime, timedelta
from abc import ABC, abstractmethod
import time


In [36]:
# =============================================================================
# STREAMLINED GTFS DATA PREPARATOR FOR DISCRETE OPTIMIZATION
# =============================================================================

class GTFSDataPreparator:
    """
    Streamlined GTFS data extraction focused on optimization essentials.
    
    Extracts only what's needed:
    - Headways by interval (current values from GTFS)
    - Round-trip times for vehicle constraints
    
    Supports discrete headway optimization where user specifies allowed values.
    """
    
    def __init__(self, 
                 gtfs_path: str,
                 interval_hours: int,
                 date: Optional[str] = None,
                 turnaround_buffer: float = 1.15,
                 default_round_trip_time: float = 60.0):
        """
        Initialize GTFS data preparator.
        
        Args:
            gtfs_path: Path to GTFS ZIP file or directory
            interval_hours: Time interval duration in HOURS (must divide 24 evenly)
            date: Optional service date filter (YYYYMMDD format)
            turnaround_buffer: Round-trip time multiplier (1.15 = 15% buffer)
            default_round_trip_time: Fallback round-trip time in MINUTES
        """
        # Input validation
        if 24 % interval_hours != 0:
            raise ValueError(f"interval_hours ({interval_hours}) must divide 24 evenly. "
                           f"Valid values: 1, 2, 3, 4, 6, 8, 12, 24")
            
        # Store configuration
        self.gtfs_path = gtfs_path
        self.date = date
        self.interval_hours = interval_hours
        self.n_intervals = 24 // interval_hours
        self.turnaround_buffer = turnaround_buffer
        self.default_round_trip_time = default_round_trip_time
        
        # Load and cache GTFS data
        self._load_gtfs()
    
    def _load_gtfs(self) -> None:
        """Load GTFS feed and cache for optimization and reconstruction."""
        print(f"⏱️  Loading GTFS feed from {self.gtfs_path}...")
        start_time = time.time()
        
        # Load original feed (keep for reconstruction)
        self.feed = gk.read_feed(self.gtfs_path, dist_units='km')
        
        # Apply date filtering if specified
        if self.date:
            print(f"📅 Filtering GTFS for date: {self.date}")
            try:
                self.feed = gk.filter_feed_by_dates(self.feed, [self.date])
                print(f"   ✅ Filtered to {len(self.feed.trips)} trips for date {self.date}")
            except Exception as e:
                print(f"   ⚠️  Date filtering failed: {e}, using full feed")
        else:
            print("📅 Using full GTFS feed (all service periods)")
        
        # Cache tables for processing
        self.trips_df = self.feed.trips.copy()
        self.stop_times_df = self.feed.stop_times.copy()
        self.routes_df = self.feed.routes.copy()
        
        # Convert times to seconds for calculations
        self.stop_times_df['departure_seconds'] = self.stop_times_df['departure_time'].apply(
            self._safe_timestr_to_seconds
        )
        self.stop_times_df['arrival_seconds'] = self.stop_times_df['arrival_time'].apply(
            self._safe_timestr_to_seconds
        )
        
        load_time = time.time() - start_time
        print(f"✅ GTFS loaded and cached in {load_time:.2f} seconds")
        print(f"   📊 {len(self.trips_df):,} trips, {len(self.stop_times_df):,} stop times")
    
    def _safe_timestr_to_seconds(self, time_value: Any) -> float:
        """Safely convert GTFS time values to seconds from midnight."""
        try:
            if pd.isna(time_value):
                return np.nan
            if isinstance(time_value, str):
                return gk.helpers.timestr_to_seconds(time_value)
            else:
                return float(time_value)
        except Exception:
            return np.nan
    
    def extract_optimization_data(self, allowed_headways: List[float]) -> Dict[str, Any]:
        """
        Extract data for discrete headway optimization.
        
        Args:
            allowed_headways: List of allowed headway values (e.g., [5, 10, 15, 20, 30])
            
        Returns:
            Dictionary with optimization problem structure
        """
        print("=== EXTRACTING DATA FOR DISCRETE HEADWAY OPTIMIZATION ===")
        
        # Extract route data
        route_data = self._extract_route_essentials()
        
        if not route_data:
            raise ValueError("No valid routes found in GTFS feed")
        
        # Create matrices for optimization
        n_routes = len(route_data)
        headways_matrix = np.full((n_routes, self.n_intervals), np.nan)
        round_trip_times = np.zeros(n_routes)
        route_ids = []
        
        for i, route in enumerate(route_data):
            headways_matrix[i, :] = route['headways_by_interval']
            round_trip_times[i] = route['round_trip_time']
            route_ids.append(route['service_id'])
        
        # Create discrete optimization problem
        allowed_values = allowed_headways + [9999.0]  # 9999 = no service
        
        # Map current headways to nearest allowed values for initial solution
        initial_solution = np.zeros((n_routes, self.n_intervals), dtype=int)
        
        for i in range(n_routes):
            for j in range(self.n_intervals):
                current_headway = headways_matrix[i, j]
                if np.isnan(current_headway):
                    # No service - choose last option (9999)
                    initial_solution[i, j] = len(allowed_values) - 1
                else:
                    # Find nearest allowed headway
                    distances = [abs(current_headway - h) for h in allowed_headways]
                    initial_solution[i, j] = np.argmin(distances)
        
        optimization_data = {
            'n_routes': n_routes,
            'n_intervals': self.n_intervals,
            'total_variables': n_routes * self.n_intervals,
            'allowed_headways': allowed_values,
            'n_choices_per_variable': len(allowed_values),
            'initial_solution': initial_solution,
            'round_trip_times': round_trip_times,
            'route_ids': route_ids,
            'interval_labels': [f"{i*self.interval_hours:02d}-{(i+1)*self.interval_hours:02d}h" 
                              for i in range(self.n_intervals)],
            'current_headways_matrix': headways_matrix,  # Keep for reference
            'gtfs_feed': self.feed  # Keep for reconstruction
        }
        
        print(f"✅ Prepared optimization data:")
        print(f"   📊 {n_routes} routes × {self.n_intervals} intervals = {n_routes * self.n_intervals} variables")
        print(f"   🔢 {len(allowed_values)} choices per variable: {allowed_values}")
        print(f"   📈 Service coverage: {np.sum(~np.isnan(headways_matrix)) / headways_matrix.size * 100:.1f}%")
        
        return optimization_data
    
    def _extract_route_essentials(self) -> List[Dict[str, Any]]:
        """Extract only essential data: headways and round-trip times."""
        print(f"⏱️  Extracting route essentials with {self.interval_hours}-hour intervals...")
        
        all_services = self.trips_df['service_id'].unique()
        route_data = []
        
        for service_id in all_services:
            service_trips = self.trips_df[self.trips_df['service_id'] == service_id]
            
            if len(service_trips) == 0:
                continue
            
            # Calculate headways by interval
            headways_by_interval = self._calculate_service_headways(service_id, service_trips)
            
            # Skip if no service found
            if np.all(np.isnan(headways_by_interval)):
                continue
            
            # Calculate round-trip time
            round_trip_time = self._calculate_round_trip_time(service_id, service_trips)
            
            route_data.append({
                'service_id': service_id,
                'headways_by_interval': headways_by_interval,
                'round_trip_time': round_trip_time
            })
        
        print(f"✅ Extracted {len(route_data)} routes")
        return route_data
    
    def _calculate_service_headways(self, service_id: str, service_trips: pd.DataFrame) -> np.ndarray:
        """Calculate headway values for each time interval."""
        headways = np.full(self.n_intervals, np.nan)
        
        try:
            trip_ids = service_trips['trip_id'].tolist()
            service_stop_times = self.stop_times_df[
                self.stop_times_df['trip_id'].isin(trip_ids)
            ].copy()
            
            if len(service_stop_times) == 0:
                return headways
            
            # Get first departure for each trip
            first_departures = service_stop_times.loc[
                service_stop_times.groupby('trip_id')['stop_sequence'].idxmin()
            ][['trip_id', 'departure_seconds']].copy()
            
            first_departures['departure_hour'] = (first_departures['departure_seconds'] // 3600) % 24
            first_departures = first_departures.dropna()
            
            # Calculate headways for each interval
            for interval in range(self.n_intervals):
                start_hour = interval * self.interval_hours
                end_hour = (interval + 1) * self.interval_hours
                
                interval_departures = first_departures[
                    (first_departures['departure_hour'] >= start_hour) &
                    (first_departures['departure_hour'] < end_hour)
                ]['departure_seconds'].values
                
                if len(interval_departures) >= 2:
                    # Calculate average interval between departures
                    interval_departures = np.sort(interval_departures)
                    intervals = np.diff(interval_departures) / 60  # Convert to minutes
                    valid_intervals = intervals[intervals > 0]
                    if len(valid_intervals) > 0:
                        headways[interval] = np.mean(valid_intervals)
                elif len(interval_departures) == 1:
                    # Single trip - once per day service
                    headways[interval] = 24 * 60  # 1440 minutes
            
            return headways
            
        except Exception:
            return headways
    
    def _calculate_round_trip_time(self, service_id: str, service_trips: pd.DataFrame) -> float:
        """Calculate round-trip time with turnaround buffer."""
        try:
            trip_ids = service_trips['trip_id'].tolist()
            service_stop_times = self.stop_times_df[
                self.stop_times_df['trip_id'].isin(trip_ids)
            ].copy()
            
            if len(service_stop_times) == 0:
                return self.default_round_trip_time
            
            trip_durations = []
            for trip_id, trip_stops in service_stop_times.groupby('trip_id'):
                if len(trip_stops) >= 2:
                    trip_stops = trip_stops.sort_values('stop_sequence')
                    first_departure = trip_stops.iloc[0]['departure_seconds']
                    last_arrival = trip_stops.iloc[-1]['arrival_seconds']
                    
                    if pd.notna(first_departure) and pd.notna(last_arrival):
                        duration_minutes = (last_arrival - first_departure) / 60.0
                        if duration_minutes > 0:
                            trip_durations.append(duration_minutes)
            
            if trip_durations:
                median_one_way = np.median(trip_durations)
                return median_one_way * 2.0 * self.turnaround_buffer
            else:
                return self.default_round_trip_time
                
        except Exception:
            return self.default_round_trip_time


# =============================================================================
# FIXED GTFS RECONSTRUCTOR
# =============================================================================

class SimplifiedGTFSReconstructor:
    """
    Simplified GTFS reconstruction that generates proper stop_times.txt.
    
    Two approaches:
    1. Full stop_times reconstruction (what you need)
    2. Optional frequencies.txt support (for advanced simulation tools)
    """
    
    def __init__(self, optimization_data: Dict[str, Any], optimization_result: Dict[str, Any]):
        self.optimization_data = optimization_data
        self.optimization_result = optimization_result
        self.feed = optimization_data['gtfs_feed']
        self.optimized_headways = self._decode_headway_solution()
    
    def _decode_headway_solution(self) -> np.ndarray:
        """Convert optimization solution indices to actual headway values."""
        solution_indices = self.optimization_result['headway_solution']
        allowed_headways = self.optimization_data['allowed_headways']
        
        n_routes, n_intervals = solution_indices.shape
        headways = np.full((n_routes, n_intervals), np.nan)
        
        for i in range(n_routes):
            for j in range(n_intervals):
                choice_idx = solution_indices[i, j]
                headway_value = allowed_headways[choice_idx]
                
                if headway_value >= 9000:
                    headways[i, j] = np.nan  # No service
                else:
                    headways[i, j] = headway_value
        
        return headways
    
    def reconstruct_gtfs(self, use_frequencies: bool = False) -> Any:
        """
        Reconstruct GTFS with proper stop_times.txt.
        
        Args:
            use_frequencies: If True, also create frequencies.txt (optional)
        
        Returns:
            Valid GTFS feed with stop_times.txt
        """
        print("=== RECONSTRUCTING GTFS WITH OPTIMIZED HEADWAYS ===")
        
        # Start with copy of original feed
        new_feed = self.feed.copy()
        
        # Generate new stop_times and trips
        new_stop_times, new_trips = self._generate_stop_times_and_trips()
        
        # Update feed
        new_feed.stop_times = new_stop_times
        new_feed.trips = new_trips
        
        # FIXED: Only add frequencies if we have trips and user requests it
        if use_frequencies and len(new_trips) > 0:
            frequencies_df = self._create_frequencies_table(new_trips)
            if len(frequencies_df) > 0:
                new_feed.frequencies = frequencies_df
                print(f"   📊 Added {len(frequencies_df):,} frequency entries")
            else:
                # Set to None instead of deleting the attribute
                new_feed.frequencies = None
                print(f"   ⚠️  No frequencies generated - skipping frequencies.txt")
        else:
            # Set to None instead of deleting - gtfs-kit expects the attribute to exist
            new_feed.frequencies = None
            print(f"   📊 Frequencies.txt disabled - using stop_times.txt only")
        
        print(f"✅ Reconstructed GTFS with stop_times.txt:")
        print(f"   📊 {len(new_trips):,} trips")
        print(f"   📊 {len(new_stop_times):,} stop times")
        
        return new_feed
    
    def _generate_stop_times_and_trips(self) -> Tuple[pd.DataFrame, pd.DataFrame]:
        """Generate both stop_times and trips tables with proper relationships."""
        new_stop_times_list = []
        new_trips_list = []
        trip_id_counter = 1
        
        route_ids = self.optimization_data['route_ids']
        n_intervals = self.optimization_data['n_intervals']
        interval_hours = 24 // n_intervals
        
        print(f"   🔄 Generating trips and stop_times for {len(route_ids)} routes")
        
        for route_idx, service_id in enumerate(route_ids):
            # Get original trips for this service
            original_trips = self.feed.trips[self.feed.trips['service_id'] == service_id]
            
            if len(original_trips) == 0:
                continue
            
            # Use first trip as template
            template_trip = original_trips.iloc[0]
            template_trip_id = template_trip['trip_id']
            
            # Get template stop_times
            template_stops = self.feed.stop_times[
                self.feed.stop_times['trip_id'] == template_trip_id
            ].sort_values('stop_sequence').copy()
            
            if len(template_stops) == 0:
                continue
            
            # Convert template times to seconds for calculations
            template_stops['departure_seconds'] = template_stops['departure_time'].apply(
                self._safe_timestr_to_seconds
            )
            template_stops['arrival_seconds'] = template_stops['arrival_time'].apply(
                self._safe_timestr_to_seconds
            )
            
            # Generate trips for each interval with service
            route_trips_generated = 0
            for interval_idx in range(n_intervals):
                headway = self.optimized_headways[route_idx, interval_idx]
                
                # Skip intervals with no service
                if np.isnan(headway):
                    continue
                
                # Calculate interval bounds
                start_hour = interval_idx * interval_hours
                end_hour = (interval_idx + 1) * interval_hours
                interval_duration_minutes = interval_hours * 60
                
                # Calculate number of trips needed in this interval
                n_trips = max(1, int(interval_duration_minutes / headway))
                
                # Generate trips spaced by optimized headway
                for trip_num in range(n_trips):
                    # Calculate start time for this trip
                    trip_start_minutes = start_hour * 60 + (trip_num * headway)
                    
                    # Don't exceed interval boundary
                    if trip_start_minutes >= end_hour * 60:
                        break
                    
                    # Create new trip with unique ID
                    new_trip_id = f"opt_{service_id}_{interval_idx}_{trip_num}"
                    new_trip = template_trip.copy()
                    new_trip['trip_id'] = new_trip_id
                    
                    # Clear any block_id to avoid conflicts
                    if 'block_id' in new_trip:
                        new_trip['block_id'] = f"block_{trip_id_counter}"
                    
                    new_trips_list.append(new_trip)
                    
                    # Generate stop_times for this trip
                    trip_stop_times = self._create_trip_stop_times(
                        template_stops, new_trip_id, trip_start_minutes
                    )
                    
                    if trip_stop_times is not None:
                        new_stop_times_list.append(trip_stop_times)
                        route_trips_generated += 1
                    
                    trip_id_counter += 1
            
            if route_trips_generated > 0 and route_idx < 5:  # Log first few routes
                print(f"   📍 Route {route_idx} ({service_id}): Generated {route_trips_generated} trips")
        
        # Combine all data
        if new_trips_list and new_stop_times_list:
            new_trips = pd.DataFrame(new_trips_list).reset_index(drop=True)
            new_stop_times = pd.concat(new_stop_times_list, ignore_index=True)
            
            print(f"   ✅ Generated {len(new_trips):,} trips with {len(new_stop_times):,} stop times")
        else:
            # Create empty but valid DataFrames
            new_trips = self.feed.trips.iloc[0:0].copy()
            new_stop_times = self.feed.stop_times.iloc[0:0].copy()
            print(f"   ⚠️  No trips generated - all routes mapped to no service")
        
        return new_stop_times, new_trips
    
    def _create_trip_stop_times(self, template_stops: pd.DataFrame, 
                              new_trip_id: str, trip_start_minutes: float) -> Optional[pd.DataFrame]:
        """Create stop_times for a single trip based on template."""
        try:
            # Calculate time offset
            template_start_seconds = template_stops.iloc[0]['departure_seconds']
            if pd.isna(template_start_seconds):
                return None
            
            trip_start_seconds = trip_start_minutes * 60
            time_offset = trip_start_seconds - template_start_seconds
            
            # Create new stop_times
            new_stop_times = template_stops.copy()
            new_stop_times['trip_id'] = new_trip_id
            
            # Adjust all times
            new_stop_times['departure_seconds'] = template_stops['departure_seconds'] + time_offset
            new_stop_times['arrival_seconds'] = template_stops['arrival_seconds'] + time_offset
            
            # Convert back to GTFS time strings
            new_stop_times['departure_time'] = new_stop_times['departure_seconds'].apply(
                self._seconds_to_timestr
            )
            new_stop_times['arrival_time'] = new_stop_times['arrival_seconds'].apply(
                self._seconds_to_timestr
            )
            
            # Remove helper columns
            new_stop_times = new_stop_times.drop(['departure_seconds', 'arrival_seconds'], 
                                               axis=1, errors='ignore')
            
            return new_stop_times
            
        except Exception as e:
            print(f"   ⚠️  Failed to create stop_times for trip {new_trip_id}: {e}")
            return None
    
    def _create_frequencies_table(self, trips_df: pd.DataFrame) -> pd.DataFrame:
        """
        FIXED: Create frequencies.txt that uses ACTUAL trip IDs from the new trips.
        
        This ensures no foreign key violations.
        """
        frequencies_list = []
        n_intervals = self.optimization_data['n_intervals']
        interval_hours = 24 // n_intervals
        route_ids = self.optimization_data['route_ids']
        
        for route_idx, service_id in enumerate(route_ids):
            # FIXED: Get trips that were actually generated for this service
            service_trips = trips_df[trips_df['service_id'] == service_id]
            
            if len(service_trips) == 0:
                continue
            
            # Create frequency entries for each interval that has service
            for interval_idx in range(n_intervals):
                headway = self.optimized_headways[route_idx, interval_idx]
                
                if np.isnan(headway):
                    continue
                
                # FIXED: Find a trip that was actually generated for this interval
                interval_trips = service_trips[
                    service_trips['trip_id'].str.contains(f'_{interval_idx}_', na=False)
                ]
                
                if len(interval_trips) == 0:
                    continue
                
                # Use the first trip from this interval as the frequency template
                template_trip_id = interval_trips.iloc[0]['trip_id']
                
                start_hour = interval_idx * interval_hours
                end_hour = (interval_idx + 1) * interval_hours
                
                frequency_entry = {
                    'trip_id': template_trip_id,  # FIXED: Use actual generated trip ID
                    'start_time': f"{start_hour:02d}:00:00",
                    'end_time': f"{end_hour:02d}:00:00",
                    'headway_secs': int(headway * 60),
                    'exact_times': 0
                }
                
                frequencies_list.append(frequency_entry)
        
        return pd.DataFrame(frequencies_list)
    
    # ...existing helper methods remain the same...
    def _safe_timestr_to_seconds(self, time_value: Any) -> float:
        """Safely convert GTFS time strings to seconds."""
        try:
            if pd.isna(time_value):
                return np.nan
            if isinstance(time_value, str):
                return gk.helpers.timestr_to_seconds(time_value)
            else:
                return float(time_value)
        except Exception:
            return np.nan
    
    def _seconds_to_timestr(self, seconds: float) -> str:
        """Convert seconds to GTFS time string format."""
        if pd.isna(seconds):
            return "00:00:00"
        
        # Handle times > 24 hours (GTFS allows this)
        hours = int(seconds // 3600)
        minutes = int((seconds % 3600) // 60)
        secs = int(seconds % 60)
        
        return f"{hours:02d}:{minutes:02d}:{secs:02d}"



# =============================================================================
# COMPLETE WORKFLOW: DATA PREPARATION → OPTIMIZATION → RECONSTRUCTION
# =============================================================================

# 1. PREPARE OPTIMIZATION DATA
print("=== STEP 1: PREPARING OPTIMIZATION DATA ===")
preparator = GTFSDataPreparator(
    gtfs_path='../data/external/study_area_gtfs_bus.zip',
    interval_hours=3,  # 8 periods per day
    date=None,  # Use full GTFS feed
    turnaround_buffer=1.15  # 15% buffer
)

# Define allowed headway values for discrete optimization
allowed_headways = [5, 10, 15, 20, 30, 45, 60, 90, 120]  # minutes

# Extract optimization data
optimization_data = preparator.extract_optimization_data(allowed_headways)

# 2. SIMULATE OPTIMIZATION RESULT (since you don't have the actual optimizer yet)
print("\n=== STEP 2: SIMULATING OPTIMIZATION RESULT ===")
# For now, use initial solution as the "optimized" result
simulated_result = {
    'headway_solution': optimization_data['initial_solution'],
    'objective_value': 1000.0,  # Placeholder
    'solve_time': 5.0,  # Placeholder
    'status': 'optimal'
}

print(f"✅ Using initial solution as optimization result")
print(f"   📊 Solution shape: {simulated_result['headway_solution'].shape}")

# 3. RECONSTRUCT GTFS WITH OPTIMIZED HEADWAYS
print("\n=== STEP 3: RECONSTRUCTING GTFS ===")
reconstructor = SimplifiedGTFSReconstructor(optimization_data, simulated_result)

# Generate GTFS with stop_times.txt (required for all simulations)
new_gtfs_feed = reconstructor.reconstruct_gtfs(use_frequencies=False)


# 4. SAVE THE COMPLETE GTFS FEED
print("\n=== STEP 4: SAVING OPTIMIZED GTFS ===")
if len(new_gtfs_feed.trips) > 0:
    output_path = '../data/processed/optimized_gtfs.zip'
    
    # Ensure output directory exists
    import os
    os.makedirs('../data/processed', exist_ok=True)
    
    # Use gtfs-kit's to_file() method - it handles ZIP automatically
    new_gtfs_feed.to_file(output_path)
    print(f"✅ Complete GTFS with stop_times.txt saved to: {output_path}")
    
else:
    print("⚠️  No trips generated - check optimization solution")

print("\n=== WORKFLOW COMPLETE ===")

=== STEP 1: PREPARING OPTIMIZATION DATA ===
⏱️  Loading GTFS feed from ../data/external/study_area_gtfs_bus.zip...
📅 Using full GTFS feed (all service periods)
✅ GTFS loaded and cached in 4.60 seconds
   📊 13,974 trips, 703,721 stop times
=== EXTRACTING DATA FOR DISCRETE HEADWAY OPTIMIZATION ===
⏱️  Extracting route essentials with 3-hour intervals...
✅ Extracted 277 routes
✅ Prepared optimization data:
   📊 277 routes × 8 intervals = 2216 variables
   🔢 10 choices per variable: [5, 10, 15, 20, 30, 45, 60, 90, 120, 9999.0]
   📈 Service coverage: 37.4%

=== STEP 2: SIMULATING OPTIMIZATION RESULT ===
✅ Using initial solution as optimization result
   📊 Solution shape: (277, 8)

=== STEP 3: RECONSTRUCTING GTFS ===
=== RECONSTRUCTING GTFS WITH OPTIMIZED HEADWAYS ===
   🔄 Generating trips and stop_times for 277 routes
   📍 Route 0 (1221): Generated 55 trips
   📍 Route 1 (1302): Generated 126 trips
   📍 Route 2 (1303): Generated 37 trips
   📍 Route 3 (1304): Generated 46 trips
   📍 Route 4 (

In [38]:
# print the optimisation_data initial_solution dictionary

optimization_data['initial_solution']  # Display the initial solution for debugging


array([[8, 2, 4, ..., 9, 1, 2],
       [8, 0, 4, ..., 8, 5, 4],
       [9, 9, 2, ..., 1, 4, 9],
       ...,
       [9, 9, 3, ..., 4, 9, 9],
       [9, 9, 9, ..., 8, 9, 9],
       [9, 9, 9, ..., 9, 9, 9]])

In [48]:
new_gtfs_feed

<gtfs_kit.feed.Feed at 0x725d3a6d7ec0>