# Generation of data

In [None]:
# Specifying various imports
import requests
import pandas as pd
import random
from statsmodels.tsa.api import SimpleExpSmoothing

In [None]:
# Expanded list of Mumbai-specific areas
areas = {
    "Urban": ["Bandra", "Dadar", "Andheri", "Churchgate", "Colaba"],
    "Suburban": ["Powai", "Thane", "Navi Mumbai", "Goregaon", "Mulund"],
    "Rural": ["Kalyan", "Lonavala", "Malad", "Virar", "Panvel"]
}

# Function to map the location category
def map_location(category):
    return random.choice(areas[category])

# Load the dataset from CSV
file_path = 'dynamic_pricing.csv'  # Replace with your file's path
df = pd.read_csv(file_path)

# Update the Location_Category column
df["Location_Category"] = df["Location_Category"].map(map_location)

# Save the updated dataset back to a CSV file
output_file_path = 'past_rides.csv'  # Replace with your desired output file path
df.to_csv(output_file_path, index=False)

print("Updated dataset saved to:", output_file_path)

### Past data analysis:
On the Analysis of previous data, We predicted how many drivers should assigned in each locality at the start of the day

In [18]:
import pandas as pd
import numpy as np
from statsmodels.tsa.holtwinters import SimpleExpSmoothing

# Load the dataset
file_path = 'past_rides.csv'
df = pd.read_csv(file_path)

# Preprocess data: Aggregate drivers by Location and Time of Booking
df_grouped = df.groupby(['Location_Category', 'Time_of_Booking'])['Number_of_Drivers'].sum().reset_index()

# Create a complete cross-join of all locations and times to ensure no missing combinations
locations = df['Location_Category'].unique()
times = ['Morning', 'Afternoon', 'Evening', 'Night']
full_index = pd.MultiIndex.from_product([locations, times], names=['Location_Category', 'Time_of_Booking'])
df_grouped = df_grouped.set_index(['Location_Category', 'Time_of_Booking']).reindex(full_index, fill_value=0).reset_index()

# Pivot the data for forecasting
forecast_data = df_grouped.pivot(index='Time_of_Booking', columns='Location_Category', values='Number_of_Drivers')

# Replace NaN with 0
forecast_data = forecast_data.fillna(0)

# Forecast using Simple Exponential Smoothing for each locality
predictions = {}
for locality in forecast_data.columns:
    series = forecast_data[locality]
    
    # Ensure series is numeric and has at least some non-zero values
    series = pd.to_numeric(series, errors='coerce').fillna(0)
    
    try:
        # If all zeros, use average from other times/locations
        if series.sum() == 0:
            fallback_value = int(df_grouped['Number_of_Drivers'].mean())
            predictions[locality] = [fallback_value] * 4
        else:
            model = SimpleExpSmoothing(series).fit()
            predictions[locality] = [max(0, int(x)) for x in model.forecast(4)]
    except Exception as e:
        print(f"Forecasting error for {locality}: {e}")
        # Absolute fallback to global average
        fallback_value = int(df_grouped['Number_of_Drivers'].mean())
        predictions[locality] = [fallback_value] * 4

# Combine predictions into a DataFrame
forecast_df = pd.DataFrame(predictions, index=['Morning', 'Afternoon', 'Evening', 'Night'])

# Save predictions to a CSV file
output_file_path = 'driver_forecast_by_time_of_day.csv'
forecast_df.to_csv(output_file_path)

print("Forecast saved to:", output_file_path)
print("\nPredicted Drivers per Location and Time:")
print(forecast_df)

Forecast saved to: driver_forecast_by_time_of_day.csv

Predicted Drivers per Location and Time:
           Andheri  Bandra  Churchgate  Colaba  Dadar  Goregaon  Kalyan  \
Morning        441     460         322     523    716       524     269   
Afternoon      441     460         322     523    716       524     269   
Evening        441     460         322     523    716       524     269   
Night          441     460         322     523    716       524     269   

           Lonavala  Malad  Mulund  Navi Mumbai  Panvel  Powai  Thane  Virar  
Morning         547    380     581          378     180    588    208    523  
Afternoon       547    380     581          378     180    588    208    523  
Evening         547    380     581          378     180    588    208    523  
Night           547    380     581          378     180    588    208    523  


  _index = to_datetime(index)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  _index = to_datetime(index)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  _index = to_datetime(index)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  _index = to_datetime(index)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  _index = to_datetime(index)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  _index = to_datetime(index)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  _index = to_datetime(index)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  _index = to_datetime(index)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  _index

## Generation of data of driver and passengers into individual files
### Saving "Driver_ID, Driver_status, vehicle_type, driver_location, Driver_rating" into "Driver.csv" file

In [None]:
# Load the CSV files
csv1 = pd.read_csv('csv1.csv')  # contains ride and vehicle details
csv2 = pd.read_csv('csv2.csv')  # contains driver ride data

# Prepare a mapping of Vehicle_Type to Expected_Ride_Duration and Average_Ratings
vehicle_info = csv1[['Vehicle_Type', 'Expected_Ride_Duration', 'Average_Ratings']]

# Merge driver details with the vehicle info based on Vehicle_Type (assumed match)
# Here you may need to refine how to link the driver ride data with the ride info
merged_df = pd.merge(csv2, vehicle_info, left_on='fare_amount', right_on='Expected_Ride_Duration', how='left')

# Extract relevant columns
driver_data = merged_df[['Driver_ID', 'fare_amount', 'pickup_datetime', 'pickup_latitude', 'pickup_longitude', 'Vehicle_Type', 'Average_Ratings']]

# Generate new columns
driver_data['Driver_status'] = driver_data['fare_amount'].apply(lambda x: 'Active' if x > 0 else 'Inactive')  # Example rule for status
driver_data['Driver_time'] = driver_data['fare_amount']  # Assuming time is correlated with fare for simplicity
driver_data['driver_location'] = driver_data.apply(lambda row: (row['pickup_latitude'], row['pickup_longitude']), axis=1)
driver_data['Driver_rating'] = driver_data['Average_Ratings']

# Save the final DataFrame into "Driver.csv"
driver_data = driver_data[['Driver_ID', 'Driver_status', 'Driver_time', 'Vehicle_Type', 'driver_location', 'Driver_rating']]
driver_data.to_csv('Driver.csv', index=False)

### The following code is of finding the shortest distance over road between two points on the map
Here we just give 2 coordinates to the function and then findout the shortest distance between them.
We get the travel time between the two points along with the distance between. This travel time calculation takes the current traffic in the way into account.

In [None]:
def get_osrm_route(start_coords, end_coords):
    """
    Fetches distance and travel time using OSRM routing service.

    Parameters:
        start_coords (tuple): (latitude, longitude) of the start point.
        end_coords (tuple): (latitude, longitude) of the end point.

    Returns:
        dict: Distance in kilometers, travel time in minutes.
    """
    # OSRM public API endpoint
    base_url = "https://router.project-osrm.org/route/v1/driving"
    
    # Format coordinates for the API
    coordinates = f"{start_coords[1]},{start_coords[0]};{end_coords[1]},{end_coords[0]}"
    
    # API request
    response = requests.get(f"{base_url}/{coordinates}", params={"overview": "false"})
    data = response.json()
    
    print(data)
    
    if response.status_code == 200 and data.get("routes"):
        # Extract distance (meters) and duration (seconds)
        distance_m = data["routes"][0]["distance"]
        duration_sec = data["routes"][0]["duration"]
        
        # Convert to desired units
        distance_km = distance_m / 1000  # meters to kilometers
        duration_min = duration_sec / 60  # seconds to minutes
        
        return {
            "distance_km": distance_km,
            "travel_time_minutes": duration_min
        }
    else:
        raise ValueError(f"Error fetching route: {data.get('message', 'Unknown error')}")

# Example usage
if __name__ == "__main__":
    start = (19.133511, 72.91227)  # Hostel-15, IIT Bombay
    end = (19.132336, 72.918861)    # IIT Bombay Main Gate
    
    try:
        route_info = get_osrm_route(start, end)
        print(f"Distance: {route_info['distance_km']:.2f} km")
        print(f"Travel Time: {route_info['travel_time_minutes']:.2f} minutes")
    except ValueError as e:
        print(e)
