In [13]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

# Load dataset
df = pd.read_csv("/content/bus_pricing_simulated.csv")


In [14]:
df.head()

Unnamed: 0,Date,DayOfWeek,Source,Destination,DepartureTime,SeatsAvailable,Bookings,Price,Holiday,Weather,EventNearby
0,2025-06-13,Friday,Bangalore,Chennai,21:00,40,15,626,No,Clear,Yes
1,2025-04-19,Saturday,Bangalore,Chennai,06:00,40,26,713,No,Rainy,No
2,2025-06-16,Monday,Pune,Mumbai,09:00,40,10,644,No,Cloudy,No
3,2025-02-10,Monday,Mumbai,Hyderabad,12:00,40,28,628,Yes,Cloudy,Yes
4,2025-06-04,Wednesday,Chennai,Bangalore,21:00,40,22,530,No,Clear,No


In [15]:

# Preprocess
df['Date'] = pd.to_datetime(df['Date'])
df['DayOfWeek'] = df['Date'].dt.dayofweek
df = pd.get_dummies(df, columns=['Source', 'Destination', 'DepartureTime', 'Weather', 'Holiday', 'EventNearby'])

In [23]:
df

Unnamed: 0,Date,DayOfWeek,SeatsAvailable,Bookings,Price,Source_Bangalore,Source_Chennai,Source_Hyderabad,Source_Mumbai,Source_Pune,...,DepartureTime_18:00,DepartureTime_21:00,Weather_Clear,Weather_Cloudy,Weather_Rainy,Holiday_No,Holiday_Yes,EventNearby_No,EventNearby_Yes,PredictedBookings
0,2025-06-13,4,40,15,626,True,False,False,False,False,...,False,True,True,False,False,True,False,False,True,20.265000
1,2025-04-19,5,40,26,713,True,False,False,False,False,...,False,False,False,False,True,True,False,True,False,21.688833
2,2025-06-16,0,40,10,644,False,False,False,False,True,...,False,False,False,True,False,True,False,True,False,18.085833
3,2025-02-10,0,40,28,628,False,False,False,True,False,...,False,False,False,True,False,False,True,False,True,28.320000
4,2025-06-04,2,40,22,530,False,True,False,False,False,...,False,True,True,False,False,True,False,True,False,21.609167
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,2025-01-29,2,40,15,604,False,False,False,False,True,...,False,False,False,True,False,True,False,True,False,20.643357
4996,2025-03-14,4,40,23,526,False,True,False,False,False,...,False,False,False,False,True,True,False,True,False,22.404000
4997,2025-04-21,0,40,34,711,False,False,False,True,False,...,False,False,False,False,True,True,False,True,False,25.914000
4998,2025-03-01,5,40,13,557,False,True,False,False,False,...,False,False,False,False,True,True,False,True,False,13.926667


In [16]:
# Features & target
X = df.drop(columns=['Bookings', 'Price', 'Date'])
y = df['Bookings']


In [17]:
X

Unnamed: 0,DayOfWeek,SeatsAvailable,Source_Bangalore,Source_Chennai,Source_Hyderabad,Source_Mumbai,Source_Pune,Destination_Bangalore,Destination_Chennai,Destination_Hyderabad,...,DepartureTime_15:00,DepartureTime_18:00,DepartureTime_21:00,Weather_Clear,Weather_Cloudy,Weather_Rainy,Holiday_No,Holiday_Yes,EventNearby_No,EventNearby_Yes
0,4,40,True,False,False,False,False,False,True,False,...,False,False,True,True,False,False,True,False,False,True
1,5,40,True,False,False,False,False,False,True,False,...,False,False,False,False,False,True,True,False,True,False
2,0,40,False,False,False,False,True,False,False,False,...,False,False,False,False,True,False,True,False,True,False
3,0,40,False,False,False,True,False,False,False,True,...,False,False,False,False,True,False,False,True,False,True
4,2,40,False,True,False,False,False,True,False,False,...,False,False,True,True,False,False,True,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,2,40,False,False,False,False,True,False,True,False,...,False,False,False,False,True,False,True,False,True,False
4996,4,40,False,True,False,False,False,False,False,False,...,True,False,False,False,False,True,True,False,True,False
4997,0,40,False,False,False,True,False,False,False,False,...,False,False,False,False,False,True,True,False,True,False
4998,5,40,False,True,False,False,False,True,False,False,...,False,False,False,False,False,True,True,False,True,False


In [18]:
y

Unnamed: 0,Bookings
0,15
1,26
2,10
3,28
4,22
...,...
4995,15
4996,23
4997,34
4998,13


In [19]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model
model = RandomForestRegressor()
model.fit(X_train, y_train)


In [20]:
# Predict and evaluate
y_pred = model.predict(X_test)
print("MAE:", mean_absolute_error(y_test, y_pred))


MAE: 7.290771661255412


In [21]:
# Export predictions
df['PredictedBookings'] = model.predict(X)

In [22]:
df['PredictedBookings']

Unnamed: 0,PredictedBookings
0,20.265000
1,21.688833
2,18.085833
3,28.320000
4,21.609167
...,...
4995,20.643357
4996,22.404000
4997,25.914000
4998,13.926667


In [24]:
df['BasePrice'] = 500

def apply_dynamic_pricing(row):
    demand = row['PredictedBookings']
    base = row['BasePrice']
    if demand <= 10:
        return base * 0.8
    elif demand <= 25:
        return base
    elif demand <= 35:
        return base * 1.15
    else:
        return base * 1.25

df['DynamicPrice'] = df.apply(apply_dynamic_pricing, axis=1)

In [25]:
df['DynamicPrice']

Unnamed: 0,DynamicPrice
0,500.0
1,500.0
2,500.0
3,575.0
4,500.0
...,...
4995,500.0
4996,500.0
4997,575.0
4998,500.0


# New Section