# Simulate Sensor Data

In [1]:
import numpy as np
import pandas as pd

# Seed for reproducibility
np.random.seed(42)

# Simulate data for 1000 time intervals (e.g., minutes)
time_intervals = 1000
passenger_load = np.random.randint(10, 50, time_intervals)  # Number of passengers on the bus
people_entered = np.random.randint(1, 10, time_intervals)   # People entering
people_exited = np.random.randint(0, 5, time_intervals)     # People exiting

# Create a DataFrame
sensor_data = pd.DataFrame({
    'Date&Time': pd.date_range(start='2024-01-01', periods=time_intervals, freq='T'),
    'Passenger_Load': passenger_load,
    'People_Entered': people_entered,
    'People_Exited': people_exited
})

# Save to CSV for further use
sensor_data.to_csv('simulated_sensor_data.csv', index=False)
sensor_data.head()

  'Date&Time': pd.date_range(start='2024-01-01', periods=time_intervals, freq='T'),


Unnamed: 0,Date&Time,Passenger_Load,People_Entered,People_Exited
0,2024-01-01 00:00:00,48,8,2
1,2024-01-01 00:01:00,38,9,1
2,2024-01-01 00:02:00,24,6,2
3,2024-01-01 00:03:00,17,7,1
4,2024-01-01 00:04:00,30,1,2


In [2]:
sensor_data.to_csv()

',Date&Time,Passenger_Load,People_Entered,People_Exited\r\n0,2024-01-01 00:00:00,48,8,2\r\n1,2024-01-01 00:01:00,38,9,1\r\n2,2024-01-01 00:02:00,24,6,2\r\n3,2024-01-01 00:03:00,17,7,1\r\n4,2024-01-01 00:04:00,30,1,2\r\n5,2024-01-01 00:05:00,48,5,1\r\n6,2024-01-01 00:06:00,28,5,3\r\n7,2024-01-01 00:07:00,32,4,1\r\n8,2024-01-01 00:08:00,20,6,1\r\n9,2024-01-01 00:09:00,20,7,0\r\n10,2024-01-01 00:10:00,33,9,1\r\n11,2024-01-01 00:11:00,45,1,0\r\n12,2024-01-01 00:12:00,49,6,4\r\n13,2024-01-01 00:13:00,33,7,0\r\n14,2024-01-01 00:14:00,12,3,3\r\n15,2024-01-01 00:15:00,31,8,0\r\n16,2024-01-01 00:16:00,11,5,4\r\n17,2024-01-01 00:17:00,33,9,3\r\n18,2024-01-01 00:18:00,39,5,2\r\n19,2024-01-01 00:19:00,47,9,2\r\n20,2024-01-01 00:20:00,11,4,0\r\n21,2024-01-01 00:21:00,30,9,0\r\n22,2024-01-01 00:22:00,42,5,4\r\n23,2024-01-01 00:23:00,21,9,4\r\n24,2024-01-01 00:24:00,31,3,1\r\n25,2024-01-01 00:25:00,34,4,4\r\n26,2024-01-01 00:26:00,36,9,1\r\n27,2024-01-01 00:27:00,37,6,3\r\n28,2024-01-01 00:28:00,25,2

In [3]:
sensor_data.shape

(1000, 4)

In [4]:
import zipfile

# Path to the zip file
zip_file_path = 'mdb-513-202408290052.zip'
extract_ = 'extracted_contents'

# Open and extract the zip file
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extract_)

print("Extraction complete.")

Extraction complete.


In [5]:
# Load GTFS files into DataFrames
stops = pd.read_csv('G:/VS Code/AI application/extracted_contents/stops.txt')
routes = pd.read_csv('G:/VS Code/AI application/extracted_contents/routes.txt')
trips = pd.read_csv('G:/VS Code/AI application/extracted_contents/trips.txt')
stop_times = pd.read_csv('G:/VS Code/AI application/extracted_contents/stop_times.txt')

# Display the first few rows of each file
# print("Stops Data:\n", stops.head())
# print("Routes Data:\n", routes.head())
# print("Trips Data:\n", trips.head())
# print("Stop Times Data:\n", stop_times.head())

In [6]:
stops.shape,"----------------",stops.isnull().sum()

((1844, 9),
 '----------------',
 stop_id              0
 stop_name            0
 stop_desc         1844
 stop_lat             0
 stop_lon             0
 zone_id           1844
 stop_url          1844
 location_type        0
 parent_station    1844
 dtype: int64)

In [7]:
routes.shape,"---------------",routes.isnull().sum()

((305, 8),
 '---------------',
 route_id            0
 agency_id           0
 route_short_name    0
 route_long_name     0
 route_desc          0
 route_type          0
 route_color         0
 route_text_color    0
 dtype: int64)

In [8]:
trips.shape,"--------------",trips.isnull().sum()

((37868, 7),
 '--------------',
 route_id         0
 service_id       0
 trip_id          0
 trip_headsign    0
 direction_id     0
 block_id         0
 shape_id         0
 dtype: int64)

In [9]:
stop_times.shape,"-------------",stop_times.isnull().sum()

((1176030, 8),
 '-------------',
 trip_id           0
 arrival_time      0
 departure_time    0
 stop_id           0
 stop_sequence     0
 pickup_type       0
 drop_off_type     0
 timepoint         0
 dtype: int64)

# Preprocess the Data

In [10]:
stop_times.head()

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,pickup_type,drop_off_type,timepoint
0,MQ_O4-Weekday-030000_M66_501,05:00:00,05:00:00,403997,1,0,0,1
1,MQ_O4-Weekday-030000_M66_501,05:01:06,05:01:06,403503,2,0,0,0
2,MQ_O4-Weekday-030000_M66_501,05:02:13,05:02:13,403504,3,0,0,0
3,MQ_O4-Weekday-030000_M66_501,05:02:59,05:02:59,403505,4,0,0,0
4,MQ_O4-Weekday-030000_M66_501,05:04:00,05:04:00,403506,5,0,0,1


In [11]:
stop_times["arrival_time"].head()

0    05:00:00
1    05:01:06
2    05:02:13
3    05:02:59
4    05:04:00
Name: arrival_time, dtype: object

In [12]:
stop_times["arrival_time"].tail()

1176025    25:06:54
1176026    25:07:39
1176027    25:08:18
1176028    25:08:50
1176029    25:10:00
Name: arrival_time, dtype: object

In [13]:
# Convert 'Time' column to a datetime object and extract only the time part
sensor_data['Date&Time'] = pd.to_datetime(sensor_data['Date&Time'])
sensor_data['Time'] = sensor_data['Date&Time'].dt.strftime('%H:%M:%S')

print(sensor_data[['Date&Time', 'Time']].head())


            Date&Time      Time
0 2024-01-01 00:00:00  00:00:00
1 2024-01-01 00:01:00  00:01:00
2 2024-01-01 00:02:00  00:02:00
3 2024-01-01 00:03:00  00:03:00
4 2024-01-01 00:04:00  00:04:00


In [14]:
# Merge on the 'TimeOnly' and 'departure_time' columns
merged_data = pd.merge(sensor_data, stop_times, left_on='Time', right_on='departure_time', how='inner')

# Drop unnecessary columns and handle missing values
merged_data = merged_data.dropna()
merged_data.head()

Unnamed: 0,Date&Time,Passenger_Load,People_Entered,People_Exited,Time,trip_id,arrival_time,departure_time,stop_id,stop_sequence,pickup_type,drop_off_type,timepoint
0,2024-01-01 00:10:00,33,9,1,00:10:00,MV_D4-Saturday-001000_M104_102,00:10:00,00:10:00,403942,1,0,0,1
1,2024-01-01 00:15:00,31,8,0,00:15:00,MV_O4-Weekday-001500_M104_101,00:15:00,00:15:00,405374,1,0,0,1
2,2024-01-01 00:15:00,31,8,0,00:15:00,MV_D4-Weekday-001500_M104_101,00:15:00,00:15:00,405374,1,0,0,1
3,2024-01-01 00:15:00,31,8,0,00:15:00,MV_D4-Saturday-001500_M104_101,00:15:00,00:15:00,405374,1,0,0,1
4,2024-01-01 00:15:00,31,8,0,00:15:00,MV_D4-Sunday-001500_M104_101,00:15:00,00:15:00,405374,1,0,0,1


In [15]:
merged_data.tail()

Unnamed: 0,Date&Time,Passenger_Load,People_Entered,People_Exited,Time,trip_id,arrival_time,departure_time,stop_id,stop_sequence,pickup_type,drop_off_type,timepoint
149059,2024-01-01 16:39:00,38,5,2,16:39:00,OF_D4-Weekday-SDon-090500_M7_230,16:39:00,16:39:00,404334,52,0,0,1
149060,2024-01-01 16:39:00,38,5,2,16:39:00,OH_D4-Weekday-SDon-093600_M15_224,16:39:00,16:39:00,903266,35,0,0,1
149061,2024-01-01 16:39:00,38,5,2,16:39:00,OH_D4-Weekday-SDon-096000_M31_310,16:39:00,16:39:00,803117,20,0,0,1
149062,2024-01-01 16:39:00,38,5,2,16:39:00,OH_D4-Weekday-SDon-098500_M101_53,16:39:00,16:39:00,803165,10,0,0,1
149063,2024-01-01 16:39:00,38,5,2,16:39:00,OH_D4-Weekday-SDon-094200_M101_115,16:39:00,16:39:00,404255,35,0,0,1


# Train a Simple AI Model
### We use a simple regression model to predict future passenger load based on historical data.

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from xgboost import XGBRegressor

# Features and target variable
X = merged_data[['People_Entered', 'People_Exited']]
y = merged_data['Passenger_Load']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train a Random Forest model
model = XGBRegressor()
model.fit(X_train, y_train)

# Predict and evaluate
predictions = model.predict(X_test)
mse = mean_squared_error(y_test, predictions)
print(f"Mean Squared Error: {mse:.2f}")


Mean Squared Error: 130.93


In [None]:
def assign_bus(current_location, passenger_load):
    # Placeholder logic for bus assignment based on location and load
    if passenger_load > 40:
        return f"Bus at {current_location} is full, please wait for the next bus."
    else:
        return f"Bus at {current_location} is available. You can board now."

# Test the function
current_location = "Stop A"
passenger_load = 35  # Example load
print(assign_bus(current_location, passenger_load))


Bus at Stop A is available. You can board now.
