In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Load dataset
df = pd.read_csv("cleaned_bus.csv.csv")

# 1. Remove unwanted column
if "Unnamed: 0" in df.columns:
    df.drop(columns=["Unnamed: 0"], inplace=True)

# 2. Convert Departure & Arrival Time to hour
df['Departure Hour'] = pd.to_datetime(df['Departure Time']).dt.hour
df['Arrival Hour'] = pd.to_datetime(df['Arrival Time']).dt.hour

# 3. Convert Travel Duration to total minutes
def duration_to_minutes(duration):
    parts = duration.replace("hrs", "").replace("mins", "").split()
    hours = int(parts[0]) if len(parts) > 0 else 0
    mins = int(parts[1]) if len(parts) > 1 else 0
    return hours * 60 + mins

df['Travel Duration (min)'] = df['Travel Duration'].apply(duration_to_minutes)

# 4. Encode categorical columns
le_operator = LabelEncoder()
le_bus_type = LabelEncoder()
le_source = LabelEncoder()
le_destination = LabelEncoder()

df['Operator'] = le_operator.fit_transform(df['Operator'])
df['Bus Type'] = le_bus_type.fit_transform(df['Bus Type'])
df['Source'] = le_source.fit_transform(df['Source'])
df['Destination'] = le_destination.fit_transform(df['Destination'])

# 5. Drop original date/time text columns
df.drop(columns=["Departure Time", "Arrival Time", "Travel Duration"], inplace=True)

# 6. Save processed data
df.to_csv("encoded_bus_data.csv", index=False)

print("✅ Encoding completed. Encoded data saved as encoded_bus_data.csv")
print(df.head())


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_absolute_error
import joblib

# Load encoded dataset
df = pd.read_csv("encoded_bus_data.csv")

# Features (X) and Target (y)
X = df.drop(columns=["price"])  # All features except price
y = df["price"]                # Target variable

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Random Forest model
model = RandomForestRegressor(n_estimators=200, random_state=42)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Model Evaluation
print("✅ R² Score:", r2_score(y_test, y_pred))
print("✅ MAE:", mean_absolute_error(y_test, y_pred))

# Save trained model
joblib.dump(model, "travel_model.pkl")
print("✅ Model saved as travel_model.pkl")


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor
import joblib

# Load dataset
df = pd.read_csv("encoded_bus_data.csv")

# Features and targets
X = df[["Operator", "Bus Type", "Source", "Destination", "distance", "rating"]]
y = df[["price", "Seats Left", "Window Seats", "Departure Hour", "Arrival Hour", "Travel Duration (min)"]]

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Multi-Output Random Forest
model = MultiOutputRegressor(RandomForestRegressor(n_estimators=200, random_state=42))
model.fit(X_train, y_train)

# Save model
joblib.dump(model, "travel_multi_model.pkl")
print("✅ Multi-output model trained & saved.")
