In [None]:
# STEP 1: Import Libraries
import pandas as pd

# STEP 2: Load the dataset (from uploaded file)
df = pd.read_csv('uber.csv')

# STEP 3: Preview the data
print("First 5 rows:\n", df.head())
print("\nInfo:\n")
print(df.info())

# STEP 4: Handle missing values
print("\nMissing values:\n", df.isnull().sum())
df = df.dropna()

# STEP 5: Convert datetime column
df['pickup_datetime'] = pd.to_datetime(df['pickup_datetime'], errors='coerce')
df = df.dropna(subset=['pickup_datetime'])

# STEP 6: Feature Engineering
df['hour'] = df['pickup_datetime'].dt.hour
df['day'] = df['pickup_datetime'].dt.day
df['month'] = df['pickup_datetime'].dt.month
df['weekday'] = df['pickup_datetime'].dt.day_name()
df['year'] = df['pickup_datetime'].dt.year
df['day_of_year'] = df['pickup_datetime'].dt.dayofyear
df['peak_hour'] = df['hour'].apply(lambda x: 'Peak' if x in [7,8,9,17,18,19] else 'Off-Peak')

# STEP 7: Remove invalid rows
df = df[(df['fare_amount'] > 0) & (df['passenger_count'] > 0)]

# STEP 8: Export cleaned CSV
df.to_csv('uber_cleaned.csv', index=False)
print("\n✅ Cleaned dataset saved as 'uber_cleaned.csv'")


In [None]:
# STEP 9: Download the cleaned CSV file
from google.colab import files
files.download('uber_cleaned.csv')
