In [6]:
## Setup and Data Loading
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score

# Assuming "Railway Final Dataset.xlsx" is uploaded to the Colab environment
ticket_df = pd.read_excel("Railway Final Dataset.xlsx", sheet_name="TicketInfo")

In [7]:
## Data Preparation: Monthly Aggregation
# Convert date and extract month/year for ticket data
ticket_df["Date_of_Purchase"] = pd.to_datetime(ticket_df["Date_of_Purchase"])
ticket_df["Month"] = ticket_df["Date_of_Purchase"].dt.month
ticket_df["Year"] = ticket_df["Date_of_Purchase"].dt.year

# Calculate Average Ticket Price per month
avg_price = ticket_df.groupby(["Year", "Month"])["Price"].mean().reset_index()
avg_price.columns = ["Year", "Month", "Avg_Ticket_Price"]

# Prepare training data (Months 1-4)
train_df = avg_price[avg_price["Month"] <= 4]

In [8]:
## Model Training
# Define features (X) and target (y) for ticket price prediction
X = train_df[["Month"]]  # Feature: Month number
y_price = train_df["Avg_Ticket_Price"] # Target: Average Ticket Price

# Initialize and train the Random Forest Regressor model for price
model_price = RandomForestRegressor(random_state=42) # Added random_state for reproducibility
model_price.fit(X, y_price)

# Calculate Model Accuracy (R-squared on training data)
pred_price_train = model_price.predict(X)
acc_price = r2_score(y_price, pred_price_train) * 100

print(f"Average Ticket Price Model Accuracy (R² on training data): {acc_price:.2f}%")

Average Ticket Price Model Accuracy (R² on training data): 63.19%


In [9]:
## Prediction for May (Month 5)
# Create the input data point for May (Month 5)
may = pd.DataFrame({"Month": [5]})

# Predict the Average Ticket Price for May
predicted_price = model_price.predict(may)[0]

print("\nPREDICTED AVERAGE TICKET PRICE FOR MAY")
print("---------------------------------------")
print(f"Predicted Average Ticket Price: £{predicted_price:.2f}")


PREDICTED AVERAGE TICKET PRICE FOR MAY
---------------------------------------
Predicted Average Ticket Price: £23.92
