In [138]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from skopt import gp_minimize
from skopt.space import Categorical
from skopt.utils import use_named_args

df = pd.read_csv('../results/result.csv')

In [140]:
# Standardize specified columns across all data
columns_to_standardize = ['mean_earnings', 'passengerless_rate', 'todest_time_rate', 'earning_per_time']
scaler = StandardScaler()
df[columns_to_standardize] = scaler.fit_transform(df[columns_to_standardize])
df = df[['weekend', 'x_1', 'x_2', 'x_3', 'mean_earnings', 'passengerless_rate', 'todest_time_rate', 'earning_per_time']]

# Filter for weekday data (weekend == 0)
weekday_df = df[df['weekend'] == 0]

# Generate all valid combinations of x_1, x_2, x_3
valid_combinations = []
for x1 in np.arange(0, 1.1, 0.1):
    for x2 in np.arange(0, 1.1-x1, 0.1):
        x3 = round(1.0 - x1 - x2, 1)  # Round to avoid floating point issues
        if 0 <= x3 <= 1:
            valid_combinations.append((round(x1, 1), round(x2, 1), x3))

def objective(x):
    x1, x2, x3 = x
    
    # Find the row that matches these x values
    row = weekday_df[(weekday_df['x_1'] == x1) & 
                     (weekday_df['x_2'] == x2) & 
                     (weekday_df['x_3'] == x3)]
    
    if len(row) == 0:
        return -np.inf  # Return a large negative value if no matching row is found
    
    # Compute a combined score (you can adjust this based on your priorities)
    score = row['mean_earnings'].sum() - row['passengerless_rate'].sum() + row['earning_per_time'].sum()
    return score  # We want to maximize this score

# Perform grid search
best_score = -np.inf
best_x = None

for x in valid_combinations:
    score = objective(x)
    if score > best_score:
        best_score = score
        best_x = x

# Get the best parameters
best_x1, best_x2, best_x3 = best_x

# Print the results
print("Best parameters:")
print(f"x_1: {best_x1:.1f}")
print(f"x_2: {best_x2:.1f}")
print(f"x_3: {best_x3:.1f}")
print(f"Best score: {best_score:.3f}")

# Find the corresponding row in the dataframe
best_row = weekday_df[(weekday_df['x_1'] == best_x1) & 
                      (weekday_df['x_2'] == best_x2) & 
                      (weekday_df['x_3'] == best_x3)]

if len(best_row) > 0:
    print("\nCorresponding standardized metrics:")
    print(f"mean_earnings: {best_row['mean_earnings'].iloc[0]:.3f}")
    print(f"passengerless_rate: {best_row['passengerless_rate'].iloc[0]:.3f}")
    print(f"todest_time_rate: {best_row['todest_time_rate'].iloc[0]:.3f}")
    print(f"earning_per_time: {best_row['earning_per_time'].iloc[0]:.3f}")

    # Reverse the standardization to get original values
    original_values = scaler.inverse_transform(best_row[columns_to_standardize])
    
    print("\nCorresponding original metrics:")
    for i, col in enumerate(columns_to_standardize):
        print(f"{col}: {original_values[0][i]:.3f}")
else:
    print("\nError: No matching row found for the optimized parameters in the dataset.")
    print("This should not happen with the discrete optimization approach.")

Best parameters:
x_1: 0.8
x_2: 0.2
x_3: -0.0
Best score: 1.073

Corresponding standardized metrics:
mean_earnings: -1.555
passengerless_rate: -0.031
todest_time_rate: 0.031
earning_per_time: -1.735

Corresponding original metrics:
mean_earnings: -1.555
passengerless_rate: -0.031
todest_time_rate: 0.031
earning_per_time: -1.735
