In [1]:
# Importing Libraries
import ast
import pandas as pd
from datasets import load_dataset
import matplotlib.pyplot as plt
import seaborn as sns

## I. World Building
### Here we define the route, the fare, and the different kinds of commuters and public transportation

In [2]:
import numpy as np
import random
from datetime import time

route_length = 24
minimum_fare = 13
minimum_distance = 4
additional_charge_per_unit = 1.50
unit = 'kilometer'
hours_to_complete_route = 2

possible_passengers = []
list_of_vehicles = []
company_raw_data = {'vehicle_id': [], 'boarding_time': [], 'boarding_place': [], 'disembarking_time': [], 'disembarking_place': [], 'payment': []}


class MorningPassenger:
    def __init__(self, mean_start_place):
        # Defining when the passenger starts waiting for public transport
        self.mean_start_hour = 7
        self.sigma_start_hour = 1.8
        self.start_hour = -1
        while self.start_hour < 0 or self.start_hour >= 24:
            self.start_hour = int(round(np.random.normal(self.mean_start_hour, self.sigma_start_hour, 1)[0], 0))
        self.start_minute = random.randint(0, 59)
        self.start_second = random.randint(0, 59)
        self.start_time = time(self.start_hour, self.start_minute, self.start_second)

        # Defining where the passenger waits for public transport
        self.mean_start_place = mean_start_place
        self.sigma_start_place = 1.5
        self.start_place = -1
        while self.start_place < 0 or self.start_place >= route_length:
            self.start_place = int(round(np.random.normal(self.mean_start_place, self.sigma_start_place, 1)[0], 0))

        # Defining where the passenger wants to go to
        self.mean_end_place = (route_length + self.start_place) / 2
        self.sigma_end_place = 4
        self.end_place = self.start_place - 1
        while self.end_place <= self.start_place or self.end_place > route_length:
            self.end_place = int(round(np.random.normal(self.mean_end_place, self.sigma_end_place, 1)[0], 0))

        # Defining the fare the passenger will pay
        if self.end_place - self.start_place <= 4:
            self.payment = minimum_fare
        else:
            self.payment = minimum_fare + (self.end_place - self.start_place - minimum_distance) * additional_charge_per_unit

        global possible_passengers
        possible_passengers.append([self.start_time, self.start_place, self.end_place, self.payment])


class NoonPassenger:
    def __init__(self, mean_start_place):
        # Defining when the passenger starts waiting for public transport
        self.mean_start_hour = 12
        self.sigma_start_hour = 1.2
        self.start_hour = -1
        while self.start_hour < 0 or self.start_hour >= 24:
            self.start_hour = int(round(np.random.normal(self.mean_start_hour, self.sigma_start_hour, 1)[0], 0))
        self.start_minute = random.randint(0, 59)
        self.start_second = random.randint(0, 59)
        self.start_time = time(self.start_hour, self.start_minute, self.start_second)
        
        # Defining where the passenger waits for public transport
        self.mean_start_place = mean_start_place
        self.sigma_start_place = 2.5
        self.start_place = -1
        while self.start_place < 0 or self.start_place >= route_length:
            self.start_place = int(round(np.random.normal(self.mean_start_place, self.sigma_start_place, 1)[0], 0))

        # Defining where the passenger wants to go to
        self.mean_end_place = (route_length + self.start_place) / 2
        self.sigma_end_place = 4
        self.end_place = self.start_place - 1
        while self.end_place <= self.start_place or self.end_place > route_length:
            self.end_place = int(round(np.random.normal(self.mean_end_place, self.sigma_end_place, 1)[0], 0))

        # Defining the fare the passenger will pay
        if self.end_place - self.start_place <= 4:
            self.payment = minimum_fare
        else:
            self.payment = minimum_fare + (self.end_place - self.start_place - minimum_distance) * additional_charge_per_unit

        global possible_passengers
        possible_passengers.append([self.start_time, self.start_place, self.end_place, self.payment])


class AfternoonPassenger:
    def __init__(self, mean_start_place):
        # Defining when the passenger starts waiting for public transport
        self.mean_start_hour = 17
        self.sigma_start_hour = 1.8
        self.start_hour = -1
        while self.start_hour < 0 or self.start_hour >= 24:
            self.start_hour = int(round(np.random.normal(self.mean_start_hour, self.sigma_start_hour, 1)[0], 0))
        self.start_minute = random.randint(0, 59)
        self.start_second = random.randint(0, 59)
        self.start_time = time(self.start_hour, self.start_minute, self.start_second)
        
        # Defining where the passenger waits for public transport
        self.mean_start_place = mean_start_place
        self.sigma_start_place = 1.8
        self.start_place = -1
        while self.start_place < 0 or self.start_place >= route_length:
            self.start_place = int(round(np.random.normal(self.mean_start_place, self.sigma_start_place, 1)[0], 0))

        # Defining where the passenger wants to go to
        self.mean_end_place = (route_length + self.start_place) / 2
        self.sigma_end_place = 4
        self.end_place = self.start_place - 1
        while self.end_place <= self.start_place or self.end_place > route_length:
            self.end_place = int(round(np.random.normal(self.mean_end_place, self.sigma_end_place, 1)[0], 0))

        # Defining the fare the passenger will pay
        if self.end_place - self.start_place <= 4:
            self.payment = minimum_fare
        else:
            self.payment = minimum_fare + (self.end_place - self.start_place - minimum_distance) * additional_charge_per_unit

        global possible_passengers
        possible_passengers.append([self.start_time, self.start_place, self.end_place, self.payment])

        
class EveningPassenger:
    def __init__(self, mean_start_place):
        # Defining when the passenger starts waiting for public transport
        self.mean_start_hour = 20
        self.sigma_start_hour = 1.8
        self.start_hour = -1
        while self.start_hour < 0 or self.start_hour >= 24:
            self.start_hour = int(round(np.random.normal(self.mean_start_hour, self.sigma_start_hour, 1)[0], 0))
        self.start_minute = random.randint(0, 59)
        self.start_second = random.randint(0, 59)
        self.start_time = time(self.start_hour, self.start_minute, self.start_second)
        
        # Defining where the passenger waits for public transport
        self.mean_start_place = mean_start_place
        self.sigma_start_place = 2.5
        self.start_place = -1
        while self.start_place < 0 or self.start_place >= route_length:
            self.start_place = int(round(np.random.normal(self.mean_start_place, self.sigma_start_place, 1)[0], 0))

        # Defining where the passenger wants to go to
        self.mean_end_place = (route_length + self.start_place) / 2
        self.sigma_end_place = 4
        self.end_place = self.start_place - 1
        while self.end_place <= self.start_place or self.end_place > route_length:
            self.end_place = int(round(np.random.normal(self.mean_end_place, self.sigma_end_place, 1)[0], 0))

        # Defining the fare the passenger will pay
        if self.end_place - self.start_place <= 4:
            self.payment = minimum_fare
        else:
            self.payment = minimum_fare + (self.end_place - self.start_place - minimum_distance) * additional_charge_per_unit

        global possible_passengers
        possible_passengers.append([self.start_time, self.start_place, self.end_place, self.payment])


class MorningPublicTransportVehicle:
    def __init__(self, number_of_morning_vehicles, vehicle_id):
        # The part about the vehicle's information
        self.vehicle_id = vehicle_id
        self.max_capacity = 20
        self.current_capacity = 0
        self.passengers = []
        self.start_hour = int(5 + ((vehicle_id * (hours_to_complete_route * 3600) / number_of_morning_vehicles) // 3600))
        self.start_minute = int(((vehicle_id * (hours_to_complete_route * 3600) / number_of_morning_vehicles) % 3600) // 60)
        self.start_second = int((vehicle_id * (hours_to_complete_route * 3600) / number_of_morning_vehicles) % 60)
        self.start_time = time(self.start_hour, self.start_minute, self.start_second)

        # The part about the vehicle's schedule
        self.travel_time_per_hour = int(round(hours_to_complete_route * 3600 / route_length, 0))
        self.arrival_time_per_stop = []
        for i in range(route_length):
            self.arrival_time_per_stop.append(
                time(self.start_hour + (self.start_minute + (self.start_second + i * self.travel_time_per_hour) // 60) // 60, 
                (self.start_minute + (self.start_second + i * self.travel_time_per_hour) // 60) % 60, 
                (self.start_second + i * self.travel_time_per_hour) % 60))
            
        self.extra_arrival_times = []
        for i in range(1, 4):
            for j in self.arrival_time_per_stop:
                self.extra_arrival_times.append(time(j.hour + i * hours_to_complete_route, j.minute, j.second))
        self.arrival_time_per_stop += self.extra_arrival_times
    
    # The part about how the vehicle loads and unloads passengers
    def stop(self, time):
        global possible_passengers
        global company_raw_data
        if time in self.arrival_time_per_stop:
            # The part where passengers disembark
            disembarking_passengers_index = []
            for iterator in range(len(self.passengers)):
                if (self.passengers[iterator][2] % route_length) == (self.arrival_time_per_stop.index(time) % route_length):
                    disembarking_passengers_index.append(iterator)
                    # We record the passenger's final information as they leave
                    company_raw_data['vehicle_id'].append(self.vehicle_id)
                    company_raw_data['boarding_time'].append(self.passengers[iterator][0])
                    company_raw_data['boarding_place'].append(self.passengers[iterator][1])
                    company_raw_data['disembarking_time'].append(time)
                    company_raw_data['disembarking_place'].append(self.passengers[iterator][2] % route_length)
                    company_raw_data['payment'].append(self.passengers[iterator][3])
            for index in sorted(disembarking_passengers_index, reverse = True):
                del self.passengers[index]
            self.current_capacity -= len(disembarking_passengers_index)

            # The part where passengers board
            i = 0
            boarding_passengers_index = []
            while self.current_capacity < self.max_capacity and i != len(possible_passengers):
                if possible_passengers[i][0] <= time and possible_passengers[i][1] == self.arrival_time_per_stop.index(time) % route_length:
                    boarding_passengers_index.append(i)
                    # Passenger's initial record as they board (boarding_time, boarding_place, disembarking_place, payment)
                    self.passengers.append((time, possible_passengers[i][1], possible_passengers[i][2], possible_passengers[i][3]))
                    self.current_capacity += 1
                i += 1
            for index in sorted(boarding_passengers_index, reverse = True):
                del possible_passengers[index]


class AfternoonPublicTransportVehicle:
    # The part about the vehicle's information
    def __init__(self, number_of_afternoon_vehicles, vehicle_id):
        self.vehicle_id = vehicle_id
        self.max_capacity = 20
        self.current_capacity = 0
        self.passengers = []
        self.start_hour = int(12 + ((vehicle_id * (hours_to_complete_route * 3600) / number_of_afternoon_vehicles) // 3600))
        self.start_minute = int(((vehicle_id * (hours_to_complete_route * 3600) / number_of_afternoon_vehicles) % 3600) // 60)
        self.start_second = int((vehicle_id * (hours_to_complete_route * 3600) / number_of_afternoon_vehicles) % 60)
        self.start_time = time(self.start_hour, self.start_minute, self.start_second)

        # The part about the vehicle's schedule
        self.travel_time_per_hour = int(round(hours_to_complete_route * 3600 / route_length, 0))
        self.arrival_time_per_stop = []
        for i in range(route_length):
            self.arrival_time_per_stop.append(
                time(self.start_hour + (self.start_minute + (self.start_second + i * self.travel_time_per_hour) // 60) // 60, 
                (self.start_minute + (self.start_second + i * self.travel_time_per_hour) // 60) % 60, 
                (self.start_second + i * self.travel_time_per_hour) % 60))
            
        self.extra_arrival_times = []
        for i in range(1, 4):
            for j in self.arrival_time_per_stop:
                self.extra_arrival_times.append(time(j.hour + i * hours_to_complete_route, j.minute, j.second))
        self.arrival_time_per_stop += self.extra_arrival_times

    # The part about how the vehicle loads and unloads passengers
    def stop(self, time):
        global possible_passengers
        global company_raw_data
        if time in self.arrival_time_per_stop:
            # The part where passengers disembark
            disembarking_passengers_index = []
            for iterator in range(len(self.passengers)):
                if (self.passengers[iterator][2] % route_length) == (self.arrival_time_per_stop.index(time) % route_length):
                    disembarking_passengers_index.append(iterator)
                    # We record the passenger's final information as they leave
                    company_raw_data['vehicle_id'].append(self.vehicle_id)
                    company_raw_data['boarding_time'].append(self.passengers[iterator][0])
                    company_raw_data['boarding_place'].append(self.passengers[iterator][1])
                    company_raw_data['disembarking_time'].append(time)
                    company_raw_data['disembarking_place'].append(self.passengers[iterator][2] % route_length)
                    company_raw_data['payment'].append(self.passengers[iterator][3])
            for index in sorted(disembarking_passengers_index, reverse = True):
                del self.passengers[index]
            self.current_capacity -= len(disembarking_passengers_index)

            # The part where passengers board
            i = 0
            boarding_passengers_index = []
            while self.current_capacity < self.max_capacity and i != len(possible_passengers):
                if possible_passengers[i][0] <= time and possible_passengers[i][1] == self.arrival_time_per_stop.index(time) % route_length:
                    boarding_passengers_index.append(i)
                    # Passenger's initial record as they board (boarding_time, boarding_place, disembarking_place, payment)
                    self.passengers.append((time, possible_passengers[i][1], possible_passengers[i][2], possible_passengers[i][3]))
                    self.current_capacity += 1
                i += 1
            for index in sorted(boarding_passengers_index, reverse = True):
                del possible_passengers[index]





## II. Creating the set of possible passengers and public transport vehicles
* Students and workers commute early. Therefore, there are a lot of passengers in the morning.
* It is hot at noon. There is a reduction in passenger volume.
* Students and workers return to their homes and other people go out to in the afternoon. This results in the busiest time of the day.
* The remaining people outside start going home in the evening.
* There are 32 public transport vehicles in the morning to start with
* There are 38 public transport vehicles in the afternoon

In [132]:
# possible_passengers = []

In [3]:
# Morning passengers
for i in range(1000):
    person = MorningPassenger(2)
for i in range(900):
    person = MorningPassenger(12)
for i in range(800):
    person = MorningPassenger(6)
for i in range(600):
    person = MorningPassenger(20)

In [4]:
# Noon passengers
for i in range(600):
    person = NoonPassenger(3)
for i in range(600):
    person = NoonPassenger(13)
for i in range(400):
    person = NoonPassenger(7)
for i in range(200):
    person = NoonPassenger(19)

In [5]:
# Afternoon passengers
for i in range(1100):
    person = AfternoonPassenger(4)
for i in range(1100):
    person = AfternoonPassenger(14)
for i in range(1000):
    person = AfternoonPassenger(8)
for i in range(1000):
    person = AfternoonPassenger(20)

In [6]:
# Evening passengers
for i in range(300):
    person = EveningPassenger(2)
for i in range(600):
    person = EveningPassenger(12)
for i in range(700):
    person = EveningPassenger(6)
for i in range(400):
    person = EveningPassenger(18)

In [7]:
# Sorting the passengers by the time they arrived
possible_passengers.sort()

In [9]:
list_of_vehicles = []
# company_raw_data = {'vehicle_id': [], 'boarding_time': [], 'boarding_place': [], 'disembarking_time': [],'disembarking_place': [], 'payment': []}
# possible_passengers = []

In [10]:
# Public transport for morning to noon
for i in range(32):
    bus = MorningPublicTransportVehicle(32, i)
    list_of_vehicles.append(bus)

# Public transport for afternoon to evening
for i in range(38):
    bus = AfternoonPublicTransportVehicle(38, 32 + i)
    list_of_vehicles.append(bus)

## III. Simulating the public transport vehicles boarding and unboarding passengers

In [11]:
for hour in range(5, 22):
    for minute in range(60):
        for second in range(60):
            for vehicle in list_of_vehicles:
                vehicle.stop(time(hour, minute, second))

## IV. Displaying the simulated data we will be analyzing
### This sample data is sure to be possible. Here, there is no chance of public transport vehicles exceeding their max capacity unlike in simple randomly generated sample data.

In [12]:
df = pd.DataFrame(company_raw_data).sort_values(['boarding_time', 'vehicle_id'])
df

Unnamed: 0,vehicle_id,boarding_time,boarding_place,disembarking_time,disembarking_place,payment
0,0,05:00:00,0,05:15:00,3,13.0
1,0,05:00:00,0,05:25:00,5,14.5
4,0,05:00:00,0,05:35:00,7,17.5
15,0,05:00:00,0,05:45:00,9,20.5
23,0,05:00:00,0,05:50:00,10,22.0
...,...,...,...,...,...,...
10410,56,21:51:50,11,21:56:50,12,13.0
10415,42,21:52:37,20,21:57:37,21,13.0
10416,42,21:52:37,20,21:57:37,21,13.0
10419,50,21:52:53,15,21:57:53,16,13.0


## V. Infering the needed information from the sample data

In [13]:
# How many people board which vehicle, which place, and what time
board = pd.DataFrame(df[['vehicle_id', 'boarding_time', 'boarding_place']].value_counts()).sort_values('boarding_time')
board.reset_index(inplace = True)
board['board_count'] = board[0]
board.drop(0, axis = 1, inplace = True)
board

Unnamed: 0,vehicle_id,boarding_time,boarding_place,board_count
0,0,05:00:00,0,12
1,0,05:05:00,1,8
2,1,05:08:45,1,12
3,2,05:12:30,1,3
4,1,05:13:45,2,8
...,...,...,...,...
4384,48,21:51:34,16,1
4385,56,21:51:50,11,1
4386,42,21:52:37,20,2
4387,50,21:52:53,15,1


In [14]:
# How many people get off which vehicle, which place, and what time
disembark = pd.DataFrame(df[['vehicle_id', 'disembarking_time', 'disembarking_place']].value_counts()).sort_values('disembarking_time')
disembark.reset_index(inplace = True)
disembark['disembark_count'] = disembark[0]
disembark.drop(0, axis = 1, inplace = True)
disembark

Unnamed: 0,vehicle_id,disembarking_time,disembarking_place,disembark_count
0,0,05:15:00,3,1
1,0,05:25:00,5,1
2,2,05:32:30,5,1
3,1,05:33:45,6,1
4,0,05:35:00,7,2
...,...,...,...,...
3900,47,21:58:25,18,2
3901,55,21:58:41,13,2
3902,60,21:59:28,10,4
3903,41,21:59:28,22,2


In [15]:
derivative = pd.merge(board, disembark, left_on = ['vehicle_id', 'boarding_time', 'boarding_place'], right_on = ['vehicle_id', 'disembarking_time', 'disembarking_place'], how = 'outer')

# Turns null board_count and disembark_count to zero
derivative['board_count'].fillna(0, inplace = True)
derivative['disembark_count'].fillna(0, inplace = True)
derivative['board_count'] = derivative['board_count'].apply((lambda x: int(x)))
derivative['disembark_count'] = derivative['disembark_count'].apply((lambda x: int(x)))

# Combines boarding and disembarking place
derivative.loc[pd.isna(derivative['disembarking_place']), 'disembarking_place'] = derivative['boarding_place']
derivative['place'] = derivative['disembarking_place'].apply((lambda x: int(x)))
derivative.drop(columns = ['boarding_place', 'disembarking_place'], inplace = True)

# Combines boarding and disembarking time
derivative.loc[pd.isna(derivative['disembarking_time']), 'disembarking_time'] = derivative['boarding_time']
derivative['time'] = derivative['disembarking_time']
derivative.drop(columns = ['boarding_time', 'disembarking_time'], inplace = True)
derivative.sort_values('time', inplace = True)

# Add a column for the time's hour part
derivative['boarding_hour'] = derivative['time'].apply(lambda x: x.hour)

# Calculates the change in the number of passenger
derivative['change_in_count'] = derivative['board_count'] - derivative['disembark_count']
# derivative.drop(columns = ['board_count', 'disembark_count'], inplace = True)

# derivative = derivative[['vehicle_id', ]]
derivative

Unnamed: 0,vehicle_id,board_count,disembark_count,place,time,boarding_hour,change_in_count
0,0,12,0,0,05:00:00,5,12
1,0,8,0,1,05:05:00,5,8
2,1,12,0,1,05:08:45,5,12
3,2,3,0,1,05:12:30,5,3
4,1,8,0,2,05:13:45,5,8
...,...,...,...,...,...,...,...
5528,47,0,2,18,21:58:25,21,-2
5529,55,0,2,13,21:58:41,21,-2
5531,41,0,2,22,21:59:28,21,-2
5530,60,0,4,10,21:59:28,21,-4


In [16]:
# Separating the records by vehicles
record_by_vehicle = {vehicle_id: derivative[derivative['vehicle_id'] == vehicle_id] for vehicle_id in list(derivative['vehicle_id'].unique())}

# Calculating how many passengers each vehicle has at every stop they make
for key, value in record_by_vehicle.items():
    value_copy = value.copy()
    value_copy['current_passengers'] = value_copy['change_in_count'].cumsum()
    value_copy['current_capacity_percentage'] = value_copy['current_passengers'] / 20
    record_by_vehicle[key] = value_copy

In [17]:
record_by_vehicle[0]

Unnamed: 0,vehicle_id,board_count,disembark_count,place,time,boarding_hour,change_in_count,current_passengers,current_capacity_percentage
0,0,12,0,0,05:00:00,5,12,12,0.60
1,0,8,0,1,05:05:00,5,8,20,1.00
5,0,1,1,3,05:15:00,5,0,20,1.00
15,0,1,1,5,05:25:00,5,0,20,1.00
28,0,2,2,7,05:35:00,5,0,20,1.00
...,...,...,...,...,...,...,...,...,...
1709,0,1,5,18,12:30:00,12,-4,9,0.45
4895,0,0,4,19,12:35:00,12,-4,5,0.25
1751,0,1,2,20,12:40:00,12,-1,4,0.20
4914,0,0,2,22,12:50:00,12,-2,2,0.10
