In [4]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

# Step 1: Data Preprocessing
data = pd.read_csv('uber.csv')

# Handle missing or incorrect data (if any)
data.dropna(inplace=True)

# Convert date-time strings to datetime objects
data['pickup_datetime'] = pd.to_datetime(data['pickup_datetime'])

# Calculate distance between pickup and dropoff
def haversine_distance(lat1, lon1, lat2, lon2):
    # Haversine formula to calculate distance
    ...

# Ensure the distance column contains numerical values
data['distance'] = data.apply(lambda row: haversine_distance(row['pickup_latitude'], row['pickup_longitude'], row['dropoff_latitude'], row['dropoff_longitude']), axis=1)

# Convert the distance column to a numerical data type (e.g., float)
data['distance'] = data['distance'].astype(float)


# Step 2: Feature Selection
features = ['passenger_count', 'distance']
target = 'fare_amount'

# Step 3: Split Data
X = data[features]
y = data[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Model Building
model = xgb.XGBRegressor()

# Step 5: Training
model.fit(X_train, y_train)

# Step 6: Prediction
y_pred = model.predict(X_test)

# Step 7: Evaluation
mae = mean_absolute_error(y_test, y_pred)
print(f"Mean Absolute Error: {mae}")

# Step 8: User Input
while True:
    try:
        passenger_count = int(input("Enter the number of passengers: "))
        distance = float(input("Enter the distance of travel in miles: "))
        break
    except ValueError:
        print("Invalid input. Please enter a valid number.")

# Create a DataFrame with the user input
user_input = pd.DataFrame({'passenger_count': [passenger_count], 'distance': [distance]})

# Step 9: Make Predictions for User Input
fare_prediction = model.predict(user_input)

# Step 10: Display the Prediction
print(f"Estimated fare amount: ${fare_prediction[0]:.2f}")


ModuleNotFoundError: No module named 'xgboost'