In [6]:


import pandas as pd
import numpy as np
import random
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Seed for reproducibility
np.random.seed(42)

# Constants
city = 'Chennai'
routes = ['Route_1', 'Route_2', 'Route_3', 'Route_4', 'Route_5', 'Route_6']
peak_types = ['School', 'College', 'Office']
peak_times = ['Morning_School', 'Morning_Office', 'Afternoon_School', 'Evening_Office']
non_peak_times = ['Late_Morning', 'Early_Afternoon', 'Night']
time_of_day = peak_times + non_peak_times
places = ['T. Nagar', 'Adyar', 'Velachery', 'Guindy', 'Anna Nagar', 'Tambaram', 'Mylapore', 'Vadapalani', 'Kodambakkam', 'Porur']

# Define function to simulate traffic conditions
def traffic_conditions():
    return random.choice(['Low', 'Moderate', 'High'])

# Define function to simulate passenger demand
def passenger_demand(time):
    if time in peak_times:
        return np.random.randint(80, 200)  # Higher demand during peak times
    else:
        return np.random.randint(20, 80)  # Lower demand during non-peak times

# Define function to simulate bus availability (utilization)
def bus_utilization(demand, traffic):
    if traffic == 'Low':
        return demand // 10
    elif traffic == 'Moderate':
        return demand // 8
    else:
        return demand // 5

# Generate 1000 records with peak hour and non-peak hour data
data = []
for i in range(1000):
    record = {}
    record['Place'] = random.choice(places)
    record['Route'] = random.choice(routes)
    record['Time_of_Day'] = random.choice(time_of_day)
    record['Peak_Type'] = random.choice(peak_types) if record['Time_of_Day'] in peak_times else 'Non_Peak'
    record['Passenger_Demand'] = passenger_demand(record['Time_of_Day'])
    record['Traffic_Conditions'] = traffic_conditions()
    record['Scheduled_Buses'] = bus_utilization(record['Passenger_Demand'], record['Traffic_Conditions'])
    data.append(record)

# Create a DataFrame
df = pd.DataFrame(data)

# Save the dataset to a CSV file
df.to_csv(r'C:\Users\SEC\Desktop\abc\chennai_bus_scheduling_dataset_1000 (1).csv', index=False)

# Display the first 5 rows of the dataset
print(df.head())

# Step 2: Function to search the dataset by place name
def search_by_place(place_name):
    # Filter the dataset by the provided place
    result = df[df['Place'].str.contains(place_name, case=False)]
    
    if not result.empty:
        return result[['Place', 'Time_of_Day', 'Peak_Type', 'Passenger_Demand', 'Traffic_Conditions', 'Scheduled_Buses']]
    else:
        return f"No data found for the place: {place_name}"

# Step 3: Search based on user input
user_input = input("Enter the place name to search (e.g., Velachery, T. Nagar): ")
search_result = search_by_place(user_input)
print(search_result)


         Place    Route      Time_of_Day Peak_Type  Passenger_Demand  \
0    Velachery  Route_4  Early_Afternoon  Non_Peak                58   
1  Kodambakkam  Route_3   Morning_School    School               131   
2   Vadapalani  Route_3   Evening_Office    Office               172   
3     Mylapore  Route_6   Evening_Office    Office                94   
4     Tambaram  Route_2            Night  Non_Peak                62   

  Traffic_Conditions  Scheduled_Buses  
0           Moderate                7  
1               High               26  
2               High               34  
3           Moderate               11  
4           Moderate                7  
No data found for the place: Enter
