In [1]:
# Importing Libraries
import ast
import pandas as pd
from datasets import load_dataset
import matplotlib.pyplot as plt
import seaborn as sns

## I. World Building
### Here we define the route, the fare, and the different kinds of commuters and public transportation

In [76]:
import numpy as np
import random
from datetime import time

route_length = 24
minimum_fare = 13
minimum_distance = 4
additional_charge_per_unit = 1.50
unit = 'kilometer'
hours_to_complete_route = 2

possible_passengers = []
list_of_vehicles = []
company_raw_data = {'vehicle_id': [], 'boarding_time': [], 'boarding_place': [], 'disembarking_time': [], 'disembarking_place': [], 'payment': []}


class MorningPassenger:
    def __init__(self, mean_start_place):
        # Defining when the passenger starts waiting for public transport
        self.mean_start_hour = 7
        self.sigma_start_hour = 1.8
        self.start_hour = -1
        while self.start_hour < 0 or self.start_hour >= 24:
            self.start_hour = int(round(np.random.normal(self.mean_start_hour, self.sigma_start_hour, 1)[0], 0))
        self.start_minute = random.randint(0, 59)
        self.start_second = random.randint(0, 59)
        self.start_time = time(self.start_hour, self.start_minute, self.start_second)

        # Defining where the passenger waits for public transport
        self.mean_start_place = mean_start_place
        self.sigma_start_place = 1.5
        self.start_place = -1
        while self.start_place < 0 or self.start_place >= route_length:
            self.start_place = int(round(np.random.normal(self.mean_start_place, self.sigma_start_place, 1)[0], 0))

        # Defining where the passenger wants to go to
        self.mean_end_place = (route_length + self.start_place) / 2
        self.sigma_end_place = 4
        self.end_place = self.start_place - 1
        while self.end_place <= self.start_place or self.end_place >= route_length:
            self.end_place = int(round(np.random.normal(self.mean_end_place, self.sigma_end_place, 1)[0], 0))

        # Defining the fare the passenger will pay
        if self.end_place - self.start_place <= 4:
            self.payment = minimum_fare
        else:
            self.payment = minimum_fare + (self.end_place - self.start_place - minimum_distance) * additional_charge_per_unit

        global possible_passengers
        possible_passengers.append([self.start_time, self.start_place, self.end_place, self.payment])


class NoonPassenger:
    def __init__(self, mean_start_place):
        # Defining when the passenger starts waiting for public transport
        self.mean_start_hour = 12
        self.sigma_start_hour = 1.2
        self.start_hour = -1
        while self.start_hour < 0 or self.start_hour >= 24:
            self.start_hour = int(round(np.random.normal(self.mean_start_hour, self.sigma_start_hour, 1)[0], 0))
        self.start_minute = random.randint(0, 59)
        self.start_second = random.randint(0, 59)
        self.start_time = time(self.start_hour, self.start_minute, self.start_second)
        
        # Defining where the passenger waits for public transport
        self.mean_start_place = mean_start_place
        self.sigma_start_place = 2.5
        self.start_place = -1
        while self.start_place < 0 or self.start_place >= route_length:
            self.start_place = int(round(np.random.normal(self.mean_start_place, self.sigma_start_place, 1)[0], 0))

        # Defining where the passenger wants to go to
        self.mean_end_place = (route_length + self.start_place) / 2
        self.sigma_end_place = 4
        self.end_place = self.start_place - 1
        while self.end_place <= self.start_place or self.end_place >= route_length:
            self.end_place = int(round(np.random.normal(self.mean_end_place, self.sigma_end_place, 1)[0], 0))

        # Defining the fare the passenger will pay
        if self.end_place - self.start_place <= 4:
            self.payment = minimum_fare
        else:
            self.payment = minimum_fare + (self.end_place - self.start_place - minimum_distance) * additional_charge_per_unit

        global possible_passengers
        possible_passengers.append([self.start_time, self.start_place, self.end_place, self.payment])


class AfternoonPassenger:
    def __init__(self, mean_start_place):
        # Defining when the passenger starts waiting for public transport
        self.mean_start_hour = 17
        self.sigma_start_hour = 1.8
        self.start_hour = -1
        while self.start_hour < 0 or self.start_hour >= 24:
            self.start_hour = int(round(np.random.normal(self.mean_start_hour, self.sigma_start_hour, 1)[0], 0))
        self.start_minute = random.randint(0, 59)
        self.start_second = random.randint(0, 59)
        self.start_time = time(self.start_hour, self.start_minute, self.start_second)
        
        # Defining where the passenger waits for public transport
        self.mean_start_place = mean_start_place
        self.sigma_start_place = 1.8
        self.start_place = -1
        while self.start_place < 0 or self.start_place >= route_length:
            self.start_place = int(round(np.random.normal(self.mean_start_place, self.sigma_start_place, 1)[0], 0))

        # Defining where the passenger wants to go to
        self.mean_end_place = (route_length + self.start_place) / 2
        self.sigma_end_place = 4
        self.end_place = self.start_place - 1
        while self.end_place <= self.start_place or self.end_place >= route_length:
            self.end_place = int(round(np.random.normal(self.mean_end_place, self.sigma_end_place, 1)[0], 0))

        # Defining the fare the passenger will pay
        if self.end_place - self.start_place <= 4:
            self.payment = minimum_fare
        else:
            self.payment = minimum_fare + (self.end_place - self.start_place - minimum_distance) * additional_charge_per_unit

        global possible_passengers
        possible_passengers.append([self.start_time, self.start_place, self.end_place, self.payment])

        
class EveningPassenger:
    def __init__(self, mean_start_place):
        # Defining when the passenger starts waiting for public transport
        self.mean_start_hour = 20
        self.sigma_start_hour = 1.8
        self.start_hour = -1
        while self.start_hour < 0 or self.start_hour >= 24:
            self.start_hour = int(round(np.random.normal(self.mean_start_hour, self.sigma_start_hour, 1)[0], 0))
        self.start_minute = random.randint(0, 59)
        self.start_second = random.randint(0, 59)
        self.start_time = time(self.start_hour, self.start_minute, self.start_second)
        
        # Defining where the passenger waits for public transport
        self.mean_start_place = mean_start_place
        self.sigma_start_place = 2.5
        self.start_place = -1
        while self.start_place < 0 or self.start_place >= route_length:
            self.start_place = int(round(np.random.normal(self.mean_start_place, self.sigma_start_place, 1)[0], 0))

        # Defining where the passenger wants to go to
        self.mean_end_place = (route_length + self.start_place) / 2
        self.sigma_end_place = 4
        self.end_place = self.start_place - 1
        while self.end_place < self.start_place or self.end_place > route_length:
            self.end_place = int(round(np.random.normal(self.mean_end_place, self.sigma_end_place, 1)[0], 0))

        # Defining the fare the passenger will pay
        if self.end_place - self.start_place <= 4:
            self.payment = minimum_fare
        else:
            self.payment = minimum_fare + (self.end_place - self.start_place - minimum_distance) * additional_charge_per_unit

        global possible_passengers
        possible_passengers.append([self.start_time, self.start_place, self.end_place, self.payment])


class MorningPublicTransportVehicle:
    def __init__(self, number_of_morning_vehicles, vehicle_id):
        # The part about the vehicle's information
        self.vehicle_id = vehicle_id
        self.max_capacity = 20
        self.current_capacity = 0
        self.passengers = []
        self.start_hour = int(5 + ((vehicle_id * (hours_to_complete_route * 3600) / number_of_morning_vehicles) // 3600))
        self.start_minute = int(((vehicle_id * (hours_to_complete_route * 3600) / number_of_morning_vehicles) % 3600) // 60)
        self.start_second = int((vehicle_id * (hours_to_complete_route * 3600) / number_of_morning_vehicles) % 60)
        self.start_time = time(self.start_hour, self.start_minute, self.start_second)

        # The part about the vehicle's schedule
        self.travel_time_per_hour = int(round(hours_to_complete_route * 3600 / route_length, 0))
        self.arrival_time_per_stop = []
        for i in range(route_length):
            self.arrival_time_per_stop.append(
                time(self.start_hour + (self.start_minute + (self.start_second + i * self.travel_time_per_hour) // 60) // 60, 
                (self.start_minute + (self.start_second + i * self.travel_time_per_hour) // 60) % 60, 
                (self.start_second + i * self.travel_time_per_hour) % 60))
        for i in range(1, 4):
            self.extra_arrival_times = []
            for j in self.arrival_time_per_stop:
                self.extra_arrival_times.append(time(j.hour + i * hours_to_complete_route, j.minute, j.second))
        self.arrival_time_per_stop += self.extra_arrival_times
    
    # The part about how the vehicle loads and unloads passengers
    def stop(self, time):
        global possible_passengers
        if time in self.arrival_time_per_stop:
            # The part where passengers disembark
            disembarking_passengers_index = []
            for passenger in self.passengers:
                if passenger[2] == (self.arrival_time_per_stop.index(time) % route_length):
                    disembarking_passengers_index.append(self.passengers.index(passenger))
                    # We record the passenger's final information as they leave
                    company_raw_data['vehicle_id'].append(self.vehicle_id)
                    company_raw_data['boarding_time'].append(passenger[0])
                    company_raw_data['boarding_place'].append(passenger[1])
                    company_raw_data['disembarking_time'].append(time)
                    company_raw_data['disembarking_place'].append(passenger[2])
                    company_raw_data['payment'].append(passenger[3])
            for index in sorted(disembarking_passengers_index, reverse = True):
                del self.passengers[index]
            self.current_capacity -= len(disembarking_passengers_index)

            # The part where passengers board
            i = 0
            boarding_passengers_index = []
            while self.current_capacity < self.max_capacity and i != len(possible_passengers):
                if possible_passengers[i][0] <= time and possible_passengers[i][1] == self.arrival_time_per_stop.index(time) % route_length:
                    boarding_passengers_index.append(i)
                    # Passenger's initial record as they board (boarding_time, boarding_place, disembarking_place, payment)
                    self.passengers.append((time, possible_passengers[i][1], possible_passengers[i][2], possible_passengers[i][3]))
                    self.current_capacity += 1
                i += 1
            for index in sorted(boarding_passengers_index, reverse = True):
                del possible_passengers[index]


class AfternoonPublicTransportVehicle:
    # The part about the vehicle's information
    def __init__(self, number_of_afternoon_vehicles, vehicle_id):
        self.vehicle_id = vehicle_id
        self.max_capacity = 20
        self.current_capacity = 0
        self.passengers = []
        self.start_hour = int(12 + ((vehicle_id * (hours_to_complete_route * 3600) / number_of_afternoon_vehicles) // 3600))
        self.start_minute = int(((vehicle_id * (hours_to_complete_route * 3600) / number_of_afternoon_vehicles) % 3600) // 60)
        self.start_second = int((vehicle_id * (hours_to_complete_route * 3600) / number_of_afternoon_vehicles) % 60)
        self.start_time = time(self.start_hour, self.start_minute, self.start_second)

        # The part about the vehicle's schedule
        self.travel_time_per_hour = int(round(hours_to_complete_route * 3600 / route_length, 0))
        self.arrival_time_per_stop = []
        for i in range(route_length):
            self.arrival_time_per_stop.append(
                time(self.start_hour + (self.start_minute + (self.start_second + i * self.travel_time_per_hour) // 60) // 60, 
                (self.start_minute + (self.start_second + i * self.travel_time_per_hour) // 60) % 60, 
                (self.start_second + i * self.travel_time_per_hour) % 60))
        for i in range(1, 4):
            self.extra_arrival_times = []
            for j in self.arrival_time_per_stop:
                self.extra_arrival_times.append(time(j.hour + i * hours_to_complete_route, j.minute, j.second))
        self.arrival_time_per_stop += self.extra_arrival_times

    # The part about how the vehicle loads and unloads passengers
    def stop(self, time):
        global possible_passengers
        if time in self.arrival_time_per_stop:
            # The part where passengers disembark
            disembarking_passengers_index = []
            for passenger in self.passengers:
                print(f'I want to go down at {passenger[2]} and I am currently in {self.arrival_time_per_stop.index(time) % route_length}')
                if passenger[2] == (self.arrival_time_per_stop.index(time) % route_length):
                    print('hit')
                    disembarking_passengers_index.append(self.passengers.index(passenger))
                    # We record the passenger's final information as they leave
                    company_raw_data['vehicle_id'].append(self.vehicle_id)
                    company_raw_data['boarding_time'].append(passenger[0])
                    company_raw_data['boarding_place'].append(passenger[1])
                    company_raw_data['disembarking_time'].append(time)
                    company_raw_data['disembarking_place'].append(passenger[2])
                    company_raw_data['payment'].append(passenger[3])
            for index in sorted(disembarking_passengers_index, reverse = True):
                del self.passengers[index]
            self.current_capacity -= len(disembarking_passengers_index)

            # The part where passengers board
            i = 0
            boarding_passengers_index = []
            while self.current_capacity < self.max_capacity and i != len(possible_passengers):
                if possible_passengers[i][0] <= time and possible_passengers[i][1] == self.arrival_time_per_stop.index(time) % route_length:
                    boarding_passengers_index.append(i)
                    # Passenger's initial record as they board (boarding_time, boarding_place, disembarking_place, payment)
                    self.passengers.append((time, possible_passengers[i][1], possible_passengers[i][2], possible_passengers[i][3]))
                    self.current_capacity += 1
                i += 1
            for index in sorted(boarding_passengers_index, reverse = True):
                del possible_passengers[index]





## II. Creating the set of possible passengers and public transport vehicles
* Students and workers commute early. Therefore, there are a lot of passengers in the morning.
* It is hot at noon. There is a reduction in passenger volume.
* Students and workers return to their homes and other people go out to in the afternoon. This results in the busiest time of the day.
* The remaining people outside start going home in the evening.
* There are 32 public transport vehicles in the morning to start with
* There are 38 public transport vehicles in the afternoon

In [28]:
# possible_passengers = []

In [None]:
# Morning passengers
for i in range(531):
    person = MorningPassenger(2)
for i in range(448):
    person = MorningPassenger(12)
for i in range(151):
    person = MorningPassenger(6)
for i in range(339):
    person = MorningPassenger(18)

In [None]:
# Noon passengers
for i in range(150):
    person = NoonPassenger(3)
for i in range(131):
    person = NoonPassenger(7)
for i in range(106):
    person = NoonPassenger(13)
for i in range(154):
    person = NoonPassenger(17)

In [None]:
# Afternoon passengers
for i in range(687):
    person = AfternoonPassenger(3)
for i in range(505):
    person = AfternoonPassenger(11)
for i in range(193):
    person = AfternoonPassenger(5)
for i in range(450):
    person = AfternoonPassenger(19)

In [88]:
# Evening passengers
for i in range(136):
    person = EveningPassenger(3)
for i in range(159):
    person = EveningPassenger(11)
for i in range(92):
    person = EveningPassenger(5)
for i in range(121):
    person = EveningPassenger(19)

In [89]:
# Sorting the passengers by the time they arrived
possible_passengers.sort()

In [90]:
len(possible_passengers)

11054

In [38]:
# list_of_vehicles = []
# company_raw_data = {'vehicle_id': [], 'boarding_time': [], 'boarding_place': [], 'disembarking_time': [],'disembarking_place': [], 'payment': []}
# possible_passengers = []

In [91]:
# Public transport for morning to noon
for i in range(32):
    bus = MorningPublicTransportVehicle(32, i)
    list_of_vehicles.append(bus)

# Public transport for afternoon to evening
for i in range(38):
    bus = MorningPublicTransportVehicle(38, 32 + i)
    list_of_vehicles.append(bus)

## III. Simulating the public transport vehicles boarding and unboarding passengers

In [93]:
for hour in range(5, 22):
    for minute in range(60):
        for second in range(60):
            for vehicle in list_of_vehicles:
                vehicle.stop(time(hour, minute, second))

## IV. Displaying the simulated data we will be analyzing
### This sample data is sure to be possible. Here, there is no chance of public transport vehicles exceeding their max capacity unlike in simple randomly generated sample data.

In [94]:
df = pd.DataFrame(company_raw_data).sort_values(['boarding_time', 'vehicle_id'])
df

Unnamed: 0,vehicle_id,boarding_time,boarding_place,disembarking_time,disembarking_place,payment
0,0,05:00:00,0,05:30:00,6,16.0
1,0,05:00:00,0,05:35:00,7,17.5
7,0,05:00:00,0,05:40:00,8,19.0
10,0,05:00:00,0,05:45:00,9,20.5
18,0,05:00:00,0,05:50:00,10,22.0
...,...,...,...,...,...,...
5159,69,16:07:53,18,16:27:53,22,13.0
5144,66,16:08:25,20,16:13:25,21,13.0
5163,68,16:14:44,20,16:29:44,23,13.0
5165,69,16:22:53,21,16:32:53,23,13.0


## V. Infering the needed information from the sample data

In [110]:
# How many people board which vehicle, which place, and what time
board = pd.DataFrame(df[['vehicle_id', 'boarding_time', 'boarding_place']].value_counts()).sort_values('boarding_time')
board.reset_index(inplace = True)
board['board_count'] = board[0]
board.drop(0, axis = 1, inplace = True)
board

Unnamed: 0,vehicle_id,boarding_time,boarding_place,board_count
0,0,05:00:00,0,15
1,0,05:05:00,1,5
2,2,05:07:30,0,1
3,1,05:08:45,1,20
4,3,05:11:15,0,2
...,...,...,...,...
1909,69,16:07:53,18,1
1910,66,16:08:25,20,1
1911,68,16:14:44,20,1
1912,69,16:22:53,21,1


In [111]:
# How many people get off which vehicle, which place, and what time
disembark = pd.DataFrame(df[['vehicle_id', 'disembarking_time', 'disembarking_place']].value_counts()).sort_values('disembarking_time')
disembark.reset_index(inplace = True)
disembark['disembark_count'] = disembark[0]
disembark.drop(0, axis = 1, inplace = True)
disembark

Unnamed: 0,vehicle_id,disembarking_time,disembarking_place,disembark_count
0,0,05:30:00,6,1
1,0,05:35:00,7,1
2,3,05:36:15,5,2
3,2,05:37:30,6,2
4,1,05:38:45,7,1
...,...,...,...,...
2066,69,16:22:53,21,2
2067,68,16:24:44,22,1
2068,69,16:27:53,22,3
2069,68,16:29:44,23,4


In [138]:
derivative = pd.merge(board, disembark, left_on = ['vehicle_id', 'boarding_time', 'boarding_place'], right_on = ['vehicle_id', 'disembarking_time', 'disembarking_place'], how = 'outer')
# Turns null disembark_count to zero
derivative['board_count'].fillna(0, inplace = True)
derivative['disembark_count'].fillna(0, inplace = True)
# Combines boarding and disembarking time
derivative.loc[pd.isna(derivative['disembarking_time']), 'disembarking_time'] = derivative['boarding_time']
derivative['time'] = derivative['disembarking_time']
derivative.drop(columns = ['boarding_time', 'disembarking_time'], inplace = True)
# Calculates the change in the number of passenger
derivative['change_in_count'] = derivative['board_count'] - derivative['disembark_count']
derivative.drop(columns = ['board_count', 'disembark_count'], inplace = True)
# derivative = derivative[['vehicle_id', ]]
derivative

Unnamed: 0,vehicle_id,boarding_place,disembarking_place,time,change_in_count
0,0,0.0,,05:00:00,15.0
1,0,1.0,,05:05:00,5.0
2,2,0.0,,05:07:30,1.0
3,1,1.0,,05:08:45,20.0
4,3,0.0,,05:11:15,2.0
...,...,...,...,...,...
2670,68,,21.0,16:19:44,-3.0
2671,67,,22.0,16:21:34,-1.0
2672,68,,22.0,16:24:44,-1.0
2673,68,,23.0,16:29:44,-4.0


In [141]:
derivative['boarding_place'].isna()

0       False
1       False
2       False
3       False
4       False
        ...  
2670     True
2671     True
2672     True
2673     True
2674     True
Name: boarding_place, Length: 2675, dtype: bool

In [None]:
differential['disembarking_time'].apply()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  differential['disembarking_time'][key] = differential['boarding_time'][key]
