In [1]:
import pandas as pd
import ipywidgets

# Load the data
file_path = 'data/cab_rides.csv'
cab_rides_df = pd.read_csv(file_path)

# Display the first few rows of the dataframe
cab_rides_df.head()


Unnamed: 0,distance,cab_type,time_stamp,destination,source,price,surge_multiplier,id,product_id,name
0,0.44,Lyft,1544952607890,North Station,Haymarket Square,5.0,1.0,424553bb-7174-41ea-aeb4-fe06d4f4b9d7,lyft_line,Shared
1,0.44,Lyft,1543284023677,North Station,Haymarket Square,11.0,1.0,4bd23055-6827-41c6-b23b-3c491f24e74d,lyft_premier,Lux
2,0.44,Lyft,1543366822198,North Station,Haymarket Square,7.0,1.0,981a3613-77af-4620-a42a-0c0866077d1e,lyft,Lyft
3,0.44,Lyft,1543553582749,North Station,Haymarket Square,26.0,1.0,c2d88af2-d278-4bfd-a8d0-29ca77cc5512,lyft_luxsuv,Lux Black XL
4,0.44,Lyft,1543463360223,North Station,Haymarket Square,9.0,1.0,e0126e1f-8ca9-4f2e-82b3-50505a09db9a,lyft_plus,Lyft XL


In [2]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from ipywidgets import interact, IntSlider

class DynamicPricing:
    def __init__(self, initial_eta_mean, initial_eta_std, a):
        self.eta_mean = initial_eta_mean
        self.eta_std = initial_eta_std
        self.a = a
        self.price_points = []
        self.eta_points = []
        self.sampled_etas = []
        self.actual_prices = []
        self.revenue_history = []
        self.cumulative_revenue = 0  # Initialize cumulative revenue

    def update_posterior(self, price, actual_price, demand):
        # Ensure standard deviation is never zero
        if self.eta_std < 1e-6:
            self.eta_std = 1e-6

        # Calculate the sample eta based on actual price and demand
        eta_sample = -np.log(demand / self.a) / np.log(price)
        
        # Use a fixed value for likelihood_std
        likelihood_std = 0.1  # This can be adjusted as needed for the model

        # Bayesian update
        new_mean_numerator = self.eta_mean / self.eta_std**2 + eta_sample / likelihood_std**2
        new_mean_denominator = 1 / self.eta_std**2 + 1 / likelihood_std**2
        new_mean = new_mean_numerator / new_mean_denominator if new_mean_denominator != 0 else self.eta_mean
        
        new_std = (1 / self.eta_std**2 + 1 / likelihood_std**2)**(-0.5) if new_mean_denominator != 0 else self.eta_std

        # Logging detailed updates
        old_eta_mean, old_eta_std = self.eta_mean, self.eta_std
        self.eta_mean = new_mean
        self.eta_std = max(new_std, 1e-6)
        # print(f"Old eta_mean: {old_eta_mean:.6f}, New eta_mean: {self.eta_mean:.6f}, "
            #   f"Eta_sample: {eta_sample:.6f}, Price: {price}, Demand: {demand}, "
            #   f"Likelihood STD: {likelihood_std:.6f}")



    def thompson_sampling(self):
        sampled_eta = np.random.normal(self.eta_mean, self.eta_std)
        self.sampled_etas.append(sampled_eta)
        return sampled_eta

    def get_demand(self, price, eta):
        return self.a * price ** (-np.abs(eta))

    def calculate_rmse(self):
        estimated_prices = [self.get_price(d, e) for d, e in zip(self.actual_prices, self.sampled_etas)]
        return np.sqrt(mean_squared_error(self.actual_prices, estimated_prices))

    def get_price(self, demand, eta):
        price = (demand / self.a) ** (-1 / np.abs(eta))
        self.price_points.append(price)
        return price

    def calculate_revenue(self, price, demand):
        return price * demand

    def simulate_pricing_with_revenue_tracking(self, data):
        estimated_etas = []
        estimated_demands = []

        for index, row in data.iterrows():
            sampled_eta = self.thompson_sampling()
            demand = self.get_demand(row['price'], sampled_eta)
            estimated_price = self.get_price(demand, sampled_eta)
            revenue = self.calculate_revenue(row['price'], demand)  # Use actual price for revenue calculation
            self.cumulative_revenue += revenue
            self.revenue_history.append(self.cumulative_revenue)
            
            self.actual_prices.append(row['price'])
            self.eta_points.append(sampled_eta)
            
            # Update the posterior with the actual demand observed
            self.update_posterior(row['price'], estimated_price, demand)
            
            # Append the estimated values to the lists
            estimated_etas.append(sampled_eta)
            estimated_demands.append(demand)
            
            if index % 100 == 0 and index > 0:
                print(f'Index: {index}, RMSE: {self.calculate_rmse()}, Cumulative Revenue: {self.cumulative_revenue}, Eta: {self.eta_mean:.4f}')
        
        # Add the estimated values as new columns to the original dataframe
        data['estimated_eta'] = estimated_etas
        data['estimated_demand'] = estimated_demands
        
        return data  # Return the updated DataFrame
    

    def plot_cumulative_revenue_over_time(self):
        plt.figure(figsize=(10, 6))
        plt.plot(self.revenue_history, label='Cumulative Revenue over time')
        plt.xlabel('Time')
        plt.ylabel('Cumulative Revenue')
        plt.legend()
        plt.grid(True)
        plt.show()
        
    def plot_demand_curve(self, index):
        if index < 100 or index >= len(self.sampled_etas):
            print("Index out of range for plotting.")
            return
        plt.figure(figsize=(10, 6))
        plt.scatter(self.price_points[:index], [self.get_demand(p, e) for p, e in zip(self.price_points[:index], self.eta_points[:index])], color='gray', s=10)
        price_range = np.linspace(0.5 * min(self.price_points), 1.5 * max(self.price_points), 100)
        demand_curve = self.get_demand(price_range, self.sampled_etas[index-1])
        plt.plot(price_range, demand_curve, label=f'η: {self.sampled_etas[index-1]:.4f}')
        plt.xlabel('Price')
        plt.ylabel('Demand')
        # 고정된 x축과 y축 범위 설정
        plt.xlim([0, max(self.price_points)])
        plt.ylim([0, max([self.get_demand(p, self.eta_mean) for p in price_range])])
        plt.legend()
        plt.grid(True)
        plt.show()


In [3]:
nan_product_id = '8cf7e821-f0d3-49c6-8eba-e679c0ebcf6a'

# Drop rows where product_id is '8cf7e821-f0d3-49c6-8eba-e679c0ebcf6a'
cleaned_cab_rides_df = cab_rides_df[cab_rides_df['product_id'] != nan_product_id]

# Confirm that rows with the specified product_id have been removed
remaining_missing_values = cleaned_cab_rides_df.isnull().sum()
remaining_missing_values[remaining_missing_values > 0]


Series([], dtype: int64)

In [4]:
# Function to apply DynamicPricing to a single group
def apply_dynamic_pricing(group, initial_eta_mean, initial_eta_std, a_value):
    # Initialize the DynamicPricing class with given parameters
    dp = DynamicPricing(initial_eta_mean, initial_eta_std, a_value)
    
    # Apply the simulate_pricing_with_revenue_tracking method
    # Assuming that 'price' column exists in the group DataFrame
    group = dp.simulate_pricing_with_revenue_tracking(group)
    
    return group

# Placeholder values for initial parameters (these could be customized)
initial_eta_mean = 0.5  # Example mean value for eta
initial_eta_std = 0.1   # Example standard deviation for eta
a_value = 13            # Example 'a' value for the demand function, this can be adjusted

# Reapply the DynamicPricing to the cleaned data
# Group the cleaned data by 'product_id' and 'source'
grouped_cleaned = cleaned_cab_rides_df.groupby(['product_id', 'source'])

# Apply the DynamicPricing to each group in the cleaned dataset
# Note: This operation could take a long time depending on the size of the dataset
result_dfs_cleaned = [apply_dynamic_pricing(group, initial_eta_mean, initial_eta_std, a_value) for _, group in grouped_cleaned]

# Concatenate the resulting dataframes
result_df_cleaned = pd.concat(result_dfs_cleaned, ignore_index=True)

# Check the first few rows of the result
result_df_cleaned.head()



Index: 41000, RMSE: 8.136547088291934, Cumulative Revenue: 10095.60357742042, Eta: 0.5484
Index: 43000, RMSE: 8.143776939807427, Cumulative Revenue: 10422.446513626264, Eta: 0.5485
Index: 74400, RMSE: 8.151710109573377, Cumulative Revenue: 18278.491482095877, Eta: 0.5482
Index: 78500, RMSE: 8.196096311548034, Cumulative Revenue: 19315.78290324772, Eta: 0.5482
Index: 114900, RMSE: 8.21485007085064, Cumulative Revenue: 28438.604520346278, Eta: 0.5482
Index: 128600, RMSE: 8.310228173724202, Cumulative Revenue: 32243.614778553518, Eta: 0.5482
Index: 140400, RMSE: 8.28398039171998, Cumulative Revenue: 35170.16253622966, Eta: 0.5482
Index: 157500, RMSE: 8.30639388610194, Cumulative Revenue: 39893.8247617998, Eta: 0.5482
Index: 169800, RMSE: 8.325659869510293, Cumulative Revenue: 42613.37992932039, Eta: 0.5482
Index: 194600, RMSE: 8.28557984769669, Cumulative Revenue: 48592.51686535283, Eta: 0.5482
Index: 222800, RMSE: 8.363643986866531, Cumulative Revenue: 55554.87553898228, Eta: 0.5481
Inde

Unnamed: 0,distance,cab_type,time_stamp,destination,source,price,surge_multiplier,id,product_id,name,estimated_eta,estimated_demand
0,2.32,Uber,1545072610662,North End,Back Bay,9.5,1.0,88c309eb-4f98-4410-bf60-2d2f1113abe2,55c66225-fbe7-4fd5-9072-eab1ece5e23e,UberX,0.585568,3.478725
1,1.4,Uber,1545098110455,Boston University,Back Bay,7.5,1.0,0b50bd4f-188d-430a-b934-cfad0bc27199,55c66225-fbe7-4fd5-9072-eab1ece5e23e,UberX,0.570984,4.114316
2,3.45,Uber,1543618688417,South Station,Back Bay,10.5,1.0,3c9ac81a-90ee-4841-a0b6-f35cef745853,55c66225-fbe7-4fd5-9072-eab1ece5e23e,UberX,0.470986,4.295146
3,1.08,Uber,1543622889918,Northeastern University,Back Bay,12.5,1.0,5a61910c-638e-471a-8c5f-a7b377207c6d,55c66225-fbe7-4fd5-9072-eab1ece5e23e,UberX,0.577312,3.024709
4,1.34,Uber,1543308501921,Fenway,Back Bay,7.5,1.0,4066a6b5-53bd-4e79-a8e0-703140f0d74e,55c66225-fbe7-4fd5-9072-eab1ece5e23e,UberX,0.581716,4.026298


In [5]:
result_df_cleaned.head()

Unnamed: 0,distance,cab_type,time_stamp,destination,source,price,surge_multiplier,id,product_id,name,estimated_eta,estimated_demand
0,2.32,Uber,1545072610662,North End,Back Bay,9.5,1.0,88c309eb-4f98-4410-bf60-2d2f1113abe2,55c66225-fbe7-4fd5-9072-eab1ece5e23e,UberX,0.585568,3.478725
1,1.4,Uber,1545098110455,Boston University,Back Bay,7.5,1.0,0b50bd4f-188d-430a-b934-cfad0bc27199,55c66225-fbe7-4fd5-9072-eab1ece5e23e,UberX,0.570984,4.114316
2,3.45,Uber,1543618688417,South Station,Back Bay,10.5,1.0,3c9ac81a-90ee-4841-a0b6-f35cef745853,55c66225-fbe7-4fd5-9072-eab1ece5e23e,UberX,0.470986,4.295146
3,1.08,Uber,1543622889918,Northeastern University,Back Bay,12.5,1.0,5a61910c-638e-471a-8c5f-a7b377207c6d,55c66225-fbe7-4fd5-9072-eab1ece5e23e,UberX,0.577312,3.024709
4,1.34,Uber,1543308501921,Fenway,Back Bay,7.5,1.0,4066a6b5-53bd-4e79-a8e0-703140f0d74e,55c66225-fbe7-4fd5-9072-eab1ece5e23e,UberX,0.581716,4.026298


In [6]:
result_df_cleaned.to_csv('data/cab_rides_with_eta_and_demand.csv')

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=31c7db23-e6da-4465-a558-ea9a7638bbc9' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>