In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

# ========== DIMENSION TABLES (STAR SCHEMA) ==========

# Customer Dimension
dim_customer = pd.DataFrame({
    'customer_id': range(1, 6),
    'name': ['Alice', 'Bob', 'Charlie', 'Diana', 'Ethan'],
    'email': ['alice@mail.com', 'bob@mail.com', 'charlie@mail.com', 'diana@mail.com', 'ethan@mail.com'],
    'gender': ['F', 'M', 'M', 'F', 'M'],
    'birth_year': [1985, 1990, 1988, 1995, 2000]
})

# Driver Dimension
dim_driver = pd.DataFrame({
    'driver_id': range(1, 4),
    'name': ['Rick', 'Morty', 'Summer'],
    'license_number': ['LIC001', 'LIC002', 'LIC003'],
    'rating': [4.5, 4.8, 4.2]
})

# Location Dimension
dim_location = pd.DataFrame({
    'location_id': range(1, 5),
    'area': ['Indiranagar', 'Koramangala', 'MG Road', 'Whitefield'],
    'city': ['Bangalore']*4
})

# Time Dimension
dim_time = pd.DataFrame({
    'time_id': range(1, 11),
    'date': pd.date_range(start='2025-04-01', periods=10),
    'hour': np.random.randint(0, 24, size=10),
    'day_of_week': pd.date_range(start='2025-04-01', periods=10).day_name(),
    'month': pd.date_range(start='2025-04-01', periods=10).month
})

# Payment Dimension
dim_payment = pd.DataFrame({
    'payment_id': range(1, 6),
    'method': ['Credit Card', 'UPI', 'Wallet', 'Cash', 'Net Banking'],
    'status': ['Completed', 'Failed', 'Completed', 'Pending', 'Completed']
})

# ========== FACT TABLE ==========

fact_trip = pd.DataFrame({
    'trip_id': range(1, 11),
    'customer_id': np.random.choice(dim_customer['customer_id'], 10),
    'driver_id': np.random.choice(dim_driver['driver_id'], 10),
    'start_location_id': np.random.choice(dim_location['location_id'], 10),
    'end_location_id': np.random.choice(dim_location['location_id'], 10),
    'time_id': np.random.choice(dim_time['time_id'], 10),
    'payment_id': np.random.choice(dim_payment['payment_id'], 10),
    'trip_fare': np.random.uniform(50, 500, 10).round(2),
    'trip_rating': np.random.randint(1, 6, 10)
})

# Show example tables
print("📌 FACT TABLE (fact_trip):")
print(fact_trip.head(), "\n")

print("📌 DIM TABLE (dim_customer):")
print(dim_customer.head(), "\n")

print("📌 DIM TABLE (dim_driver):")
print(dim_driver.head())

📌 FACT TABLE (fact_trip):
   trip_id  customer_id  driver_id  start_location_id  end_location_id  \
0        1            1          2                  2                2   
1        2            3          3                  3                3   
2        3            1          2                  2                1   
3        4            1          3                  4                2   
4        5            2          1                  3                4   

   time_id  payment_id  trip_fare  trip_rating  
0        6           5      62.46            1  
1        4           5      92.91            5  
2        1           2     198.50            1  
3        8           2     301.20            5  
4        7           3     160.86            5   

📌 DIM TABLE (dim_customer):
   customer_id     name             email gender  birth_year
0            1    Alice    alice@mail.com      F        1985
1            2      Bob      bob@mail.com      M        1990
2            3  Charli