<a href="https://colab.research.google.com/github/BlacknWhite-cyber/Customer_Retention/blob/main/Railway_Delay_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
# Railway Delay Prediction Notebook

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
import joblib

file_path = "/content/train delay data.csv"
df = pd.read_csv(file_path)

X = df.drop(columns=["Historical Delay (min)"])
y = df["Historical Delay (min)"]

categorical_cols = ["Weather Conditions", "Day of the Week", "Time of Day", "Train Type", "Route Congestion"]
numeric_cols = ["Distance Between Stations (km)"]

preprocessor = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_cols),
        ("num", "passthrough", numeric_cols)
    ]
)

model = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("regressor", RandomForestRegressor(n_estimators=200, random_state=42))
])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model.fit(X_train, y_train)


# Model testing for accuracy
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
from sklearn.metrics import r2_score
r2 = r2_score(y_test, y_pred)
print("Mean Absolute Error (minutes):", mae)
print("R^2 Score (model accuracy):", r2 * 100, "%")

joblib.dump(model, "railway_delay_model.pkl")

def predict_single_station(input_data: dict):
    input_df = pd.DataFrame([input_data])
    prediction = model.predict(input_df)
    return prediction[0]

def predict_multiple_stations(station_inputs: list):
    total_delay = 0
    results = []
    for station in station_inputs:
        station["Historical Delay (min)"] = total_delay
        input_df = pd.DataFrame([station])
        pred_delay = model.predict(input_df.drop(columns=["Historical Delay (min)"]))[0]
        total_delay = pred_delay
        results.append({"station": station, "predicted_delay": pred_delay})
    return results

# Function to get user input for a single station prediction
def get_single_station_input():
    print("Enter details for single station prediction:")
    distance = float(input("Distance Between Stations (km): "))
    weather = input("Weather Conditions (e.g., Clear, Rainy, Foggy): ")
    day = input("Day of the Week (e.g., Monday, Tuesday): ")
    time = input("Time of Day (e.g., Morning, Afternoon, Evening, Night): ")
    train_type = input("Train Type (e.g., Express, Superfast, Local): ")
    congestion = input("Route Congestion (e.g., Low, Medium, High): ")

    return {
        "Distance Between Stations (km)": distance,
        "Weather Conditions": weather,
        "Day of the Week": day,
        "Time of Day": time,
        "Train Type": train_type,
        "Route Congestion": congestion
    }

# Function to get user input for multiple station predictions
def get_multiple_station_inputs():
    station_inputs = []
    num_stations = int(input("Enter the number of stations for prediction: "))
    for i in range(num_stations):
        print(f"\nEnter details for Station {i+1}:")
        distance = float(input("Distance Between Stations (km): "))
        weather = input("Weather Conditions (e.g., Clear, Rainy, Foggy): ")
        day = input("Day of the Week (e.g., Monday, Tuesday): ")
        time = input("Time of Day (e.g., Morning, Afternoon, Evening, Night): ")
        train_type = input("Train Type (e.g., Express, Superfast, Local): ")
        congestion = input("Route Congestion (e.g., Low, Medium, High): ")
        station_inputs.append({
            "Distance Between Stations (km)": distance,
            "Weather Conditions": weather,
            "Day of the Week": day,
            "Time of Day": time,
            "Train Type": train_type,
            "Route Congestion": congestion,
            "Historical Delay (min)": 0 # Initial historical delay is 0 for the first station
        })
    return station_inputs


# Example of how to use the new input functions:

# --- Single Station Prediction ---
print("\n--- Single Station Prediction ---")
single_input = get_single_station_input()
single_prediction = predict_single_station(single_input)
print("Single Station Prediction (min delay):", single_prediction)

# --- Multiple Stations Prediction ---
print("\n--- Multiple Stations Prediction ---")
multi_inputs = get_multiple_station_inputs()
multi_station_predictions = predict_multiple_stations(multi_inputs)
for i, res in enumerate(multi_station_predictions):
    print(f"Station {i+1} predicted delay: {res['predicted_delay']:.2f} min")

Mean Absolute Error (minutes): 30.755865850030563
R^2 Score (model accuracy): 93.17906382833499 %

--- Single Station Prediction ---
Enter details for single station prediction:
Distance Between Stations (km): 75
Weather Conditions (e.g., Clear, Rainy, Foggy): clear
Day of the Week (e.g., Monday, Tuesday): wednesday
Time of Day (e.g., Morning, Afternoon, Evening, Night): evening
Train Type (e.g., Express, Superfast, Local): local
Route Congestion (e.g., Low, Medium, High): low
Single Station Prediction (min delay): 28.55104166666667

--- Multiple Stations Prediction ---
Enter the number of stations for prediction: 2

Enter details for Station 1:
Distance Between Stations (km): 75
Weather Conditions (e.g., Clear, Rainy, Foggy): clear
Day of the Week (e.g., Monday, Tuesday): wednesday
Time of Day (e.g., Morning, Afternoon, Evening, Night): evening
Train Type (e.g., Express, Superfast, Local): local
Route Congestion (e.g., Low, Medium, High): low

Enter details for Station 2:
Distance Bet