In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import NearestNeighbors
import numpy as np

In [2]:
file_path = r"C:\Users\lenovo\OneDrive\Desktop\github\cab fare\cab_fares_hyderabad.csv"
df = pd.read_csv(file_path)

In [3]:
df.head()

Unnamed: 0,Pickup_Location,Dropoff_Location,distance_km,duration_min,Car_Fare_inr,Auto_Fare_inr,Bike_Fare_inr
0,Dilsukhnagar,KPHB,22,50,420,359.5,199.0
1,Rajendranagar,Shamshabad,12,25,245,210.25,109.0
2,Charminar,Begumpet,11,35,240,168.25,110.5
3,Secunderabad,LB Nagar,20,45,385,283.75,162.5
4,Secunderabad,Charminar,10,30,220,137.5,105.0


In [4]:
# Create a lowercase version of the location names for processing
df['Pickup_Location_Lower'] = df['Pickup_Location'].str.lower()
df['Dropoff_Location_Lower'] = df['Dropoff_Location'].str.lower()

In [5]:
# Encode categorical data (pickup and dropoff locations)
label_encoder = LabelEncoder()
df['Pickup_Location_Encoded'] = label_encoder.fit_transform(df['Pickup_Location_Lower'])
df['Dropoff_Location_Encoded'] = label_encoder.transform(df['Dropoff_Location_Lower'])

In [7]:
# Combine distances from A to B and B to A
df['Route'] = df.apply(lambda x: tuple(sorted([x['Pickup_Location_Encoded'], x['Dropoff_Location_Encoded']])), axis=1)
df = df.drop_duplicates(subset=['Route'])

In [8]:
# Define features and target variable
X = df[['Pickup_Location_Encoded', 'Dropoff_Location_Encoded']]
y = df[['distance_km', 'Car_Fare_inr', 'Auto_Fare_inr', 'Bike_Fare_inr']]

In [13]:
# Fit a model to the data
model = LinearRegression()
model.fit(X, y)

In [14]:
# Nearest Neighbors model for interpolation
knn = NearestNeighbors(n_neighbors=1)
knn.fit(X)

In [18]:
def find_exact_match(pickup_location, dropoff_location):
    pickup_location_lower = pickup_location.lower()
    dropoff_location_lower = dropoff_location.lower()
    
    pickup_encoded = label_encoder.transform([pickup_location_lower])[0]
    dropoff_encoded = label_encoder.transform([dropoff_location_lower])[0]
    
    # Ensure the route is considered bidirectional
    route = tuple(sorted([pickup_encoded, dropoff_encoded]))
    
    exact_match = df[df['Route'] == route]
    
    if not exact_match.empty:
        distance = exact_match['distance_km'].values[0]
        car_fare = exact_match['Car_Fare_inr'].values[0]
        auto_fare = exact_match['Auto_Fare_inr'].values[0]
        bike_fare = exact_match['Bike_Fare_inr'].values[0]
        return distance, car_fare, auto_fare, bike_fare
    
    return None


In [19]:
def interpolate_fare(pickup_location, dropoff_location):
    pickup_location_lower = pickup_location.lower()
    dropoff_location_lower = dropoff_location.lower()
    
    pickup_encoded = label_encoder.transform([pickup_location_lower])[0]
    dropoff_encoded = label_encoder.transform([dropoff_location_lower])[0]
    
    # Ensure the route is considered bidirectional
    route = tuple(sorted([pickup_encoded, dropoff_encoded]))
    
    # Find the nearest encoded route in the dataset using Nearest Neighbors
    encoded_route = np.array([route[0], route[1]]).reshape(1, -1)
    distance_idx = knn.kneighbors(encoded_route)[1][0][0]
    
    nearest_distance = df.iloc[distance_idx]['distance_km']
    
    # Prepare the input data for prediction
    new_data = pd.DataFrame({
        'Pickup_Location_Encoded': [route[0]],
        'Dropoff_Location_Encoded': [route[1]]
    })

    # Predict fares
    predicted_fares = model.predict(new_data)
    
    return nearest_distance, predicted_fares[0]

In [23]:
# Display unique locations sorted alphabetically and combined
all_unique_locations = sorted(set(df['Pickup_Location']) | set(df['Dropoff_Location']))

print("Available Locations:")
for loc in all_unique_locations:
    print(f"- {loc}")

# Input for pickup and dropoff locations
pickup_location = input("\nEnter pickup location: ").strip()
dropoff_location = input("Enter dropoff location: ").strip()

# Check for exact match first
exact_match = find_exact_match(pickup_location, dropoff_location)
if exact_match:
    distance, car_fare, auto_fare, bike_fare = exact_match
    print(f"Distance:")
    print(f"Distance: {distance:.2f} km")
    print(f"Car Fare: ₹{car_fare:.2f}")
    print(f"Auto Fare: ₹{auto_fare:.2f}")
    print(f"Bike Fare: ₹{bike_fare:.2f}")
else:
    # Predict and display fares and distance if no exact match
    nearest_distance, predicted_fares = interpolate_fare(pickup_location, dropoff_location)
    print(f"Estimated Distance: {nearest_distance:.2f} km")
    print(f"Estimated Fares:")
    print(f"Car Fare: ₹{predicted_fares[1]:.2f}")
    print(f"Auto Fare: ₹{predicted_fares[2]:.2f}")
    print(f"Bike Fare: ₹{predicted_fares[3]:.2f}")

Available Locations:
- Banjara Hills
- Begumpet
- Charminar
- Dilsukhnagar
- Gachibowli
- Hitech City
- Hyderguda
- Jubilee Hills
- KPHB
- Kachiguda
- Koti
- Kukatpally
- LB Nagar
- Madhapur
- Mehdipatnam
- Moosapet
- Rajendranagar
- Secunderabad
- Shamshabad
- Somajiguda



Enter pickup location:  Banjara Hills
Enter dropoff location:  Kachiguda


Distance:
Distance: 9.00 km
Car Fare: ₹285.00
Auto Fare: ₹150.00
Bike Fare: ₹105.00
