In [None]:
import pandas as pd
import requests
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import numpy as np
import random  # For generating simulated traffic conditions

# Load the dataset from Kaggle
def load_dataset():
    df = pd.read_csv('/Users/kavyasrialladi/Downloads/taxi_train.csv')
    print("Dataset loaded successfully")
    
    # Checking if 'fare_amount' column exists
    if 'fare_amount' not in df.columns:
        raise ValueError("Error: 'fare_amount' column not found in the dataset.")
    return df

# Get coordinates from the address
def get_coordinates(address):
    geolocator = Nominatim(user_agent="taxi_fare_pred")
    try:
        location = geolocator.geocode(address, timeout=10)
        return (location.latitude, location.longitude) if location else None
    except GeocoderTimedOut:
        print(f"Error: Geocoding service timed out for address: {address}")
        return None

# Estimate travel time and distance
def estimate_time_and_distance(start_coords, end_coords):
    distance = np.sqrt((start_coords[0] - end_coords[0])**2 + (start_coords[1] - end_coords[1])**2) * 111  # Rough distance estimate in km
    time = distance / 40 * 60  # Assuming average taxi speed is 40 km/h
    return round(time, 2), round(distance, 2)  # Rounded to 2 decimal places

# Traffic analysis with random conditions (as a placeholder)
def traffic_analysis():
    # Simulating traffic conditions
    traffic_conditions = ["Light Traffic", "Moderate Traffic", "Heavy Traffic"]
    traffic_condition = random.choice(traffic_conditions)
    return traffic_condition

# Get weather data using OpenWeatherMap API
def get_weather_data(coords):
    api_key = "49c3189452981761f7944f4bdc3a24e7"  # Replace with your OpenWeatherMap API key
    lat, lon = coords
    url = f"http://api.openweathermap.org/data/2.5/weather?lat={lat}&lon={lon}&appid={api_key}"

    response = requests.get(url)
    if response.status_code == 200:
        weather_data = response.json()
        return weather_data['weather'][0]['description']
    else:
        print(f"Error: Unable to fetch weather data (Status code: {response.status_code})")
        return "Unavailable"

# Predict taxi fare using a simple model
def predict_fare(df, distance, duration):
    # Ensure the dataset has required columns
    if 'distance' not in df.columns or 'duration' not in df.columns:
        df['distance'] = np.random.uniform(1, 10, size=len(df))  # Random distances
        df['duration'] = np.random.uniform(5, 30, size=len(df))  # Random durations

    # Define features and target
    X = df[['distance', 'duration']]
    y = df['fare_amount']

    # Splitting data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Simple linear regression model
    model = LinearRegression()
    model.fit(X_train, y_train)

    # Convert input data to DataFrame with appropriate column names to avoid the warning
    input_data = pd.DataFrame([[distance, duration]], columns=['distance', 'duration'])

    # Predict fare based on input distance and duration
    predicted_fare = model.predict(input_data)
    return round(predicted_fare[0], 2)  # Rounded to 2 decimal places

# Select the best taxi service based on fare and traffic
def recommend_taxi_service(fare, traffic_condition):
    if traffic_condition == "Heavy Traffic":
        return "Uber Pool"
    elif fare < 10:
        return "Lyft"
    else:
        return "UberX"

# Main function
def main():
    # Load the dataset
    df = load_dataset()

    # Input addresses
    from_address = input("Enter pickup location: ")
    to_address = input("Enter dropoff location: ")

    # Get coordinates for both addresses
    from_coords = get_coordinates(from_address)
    to_coords = get_coordinates(to_address)

    if from_coords and to_coords:
        # Estimate time and distance
        time, distance = estimate_time_and_distance(from_coords, to_coords)
        print(f"Estimated time: {time:.2f} minutes, Distance: {distance:.2f} km")

        # Get weather data
        weather = get_weather_data(from_coords)
        print(f"Current Weather Condition: {weather}")

        # Analyze traffic
        traffic = traffic_analysis()
        print(f"Traffic Condition: {traffic}")
        
        # Predict fare
        fare = predict_fare(df, distance, time)
        print(f"Estimated Fare : ${fare:.2f}")

        # Recommend taxi service
        taxi_service = recommend_taxi_service(fare, traffic)
        print(f"Recommended Taxi Service: {taxi_service}")
    else:
        print("Invalid address. Please try again.")

if __name__ == "__main__":
    main()

Dataset loaded successfully
