In [9]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from skopt import gp_minimize
from skopt.space import Categorical
from skopt.utils import use_named_args
from sklearn.neighbors import NearestNeighbors
from skopt.space import Real
from skopt.utils import use_named_args

df = pd.read_csv('../results/result.csv')

In [14]:
columns_to_standardize = ['mean_earnings', 'passengerless_rate', 'todest_time_rate', 'earning_per_time']
scaler = StandardScaler()
df[columns_to_standardize] = scaler.fit_transform(df[columns_to_standardize])

# Filter for weekday data (weekend == 0)
weekday_df = df[df['weekend'] == 0]
# Groupby x_1, x_2, x_3 and take the mean of the standardized metrics
weekday_df = weekday_df.groupby(['x_1', 'x_2', 'x_3'])[columns_to_standardize].mean().reset_index()

# Prepare data for nearest neighbor search
X = weekday_df[['x_1', 'x_2', 'x_3']].values
y = weekday_df[columns_to_standardize].values

# Initialize NearestNeighbors
nn = NearestNeighbors(n_neighbors=1, metric='euclidean')
nn.fit(X)
cnt = 0
# Define the objective function for optimization
@use_named_args(dimensions=[Real(0.0, 1.0, name='x_1'),
                            Real(0.0, 1.0, name='x_2'),
                            Real(0.0, 1.0, name='x_3')])


def objective(x_1, x_2, x_3):
    print(x_1, x_2, x_3)
    nearest_row = y[indices[0][0]]
    # Compute a combined score (you can adjust this based on your priorities)
    score = nearest_row[0] # mean_earnings 
    return -score  # We want to maximize this score, so return negative

# Set up the optimization
dimensions = [Real(0.0, 1.0, name='x_1'),
              Real(0.0, 1.0, name='x_2'),
              Real(0.0, 1.0, name='x_3')]

# Run the optimization
result = gp_minimize(objective, dimensions, n_calls=50, random_state=42)

# Print the results
print("Best parameters:")
print(f"x_1: {result.x[0]:.3f}")
print(f"x_2: {result.x[1]:.3f}")
print(f"x_3: {result.x[2]:.3f}")
print(f"Best score: {-result.fun:.3f}")

# Find the actual nearest data point to the optimized result
distances, indices = nn.kneighbors([result.x])
nearest_point = X[indices[0][0]]

print("\nNearest actual data point:")
print(f"x_1: {nearest_point[0]:.3f}")
print(f"x_2: {nearest_point[1]:.3f}")
print(f"x_3: {nearest_point[2]:.3f}")

# Print the corresponding metrics
nearest_metrics = y[indices[0][0]]
print("\nCorresponding standardized metrics:")
print(f"mean_earnings: {nearest_metrics[0]:.3f}")
print(f"passengerless_rate: {nearest_metrics[1]:.3f}")
print(f"todest_time_rate: {nearest_metrics[2]:.3f}")
print(f"earning_per_time: {nearest_metrics[3]:.3f}")

0.7965429868602331 0.18343478986616382 0.7796910002727695
0.5968501579464871 0.44583275285359125 0.09997491581800291
0.45924889196586727 0.3337086111390219 0.1428668179219408
0.650888472948853 0.05641157902710027 0.7219987722668249
0.9385527090157504 0.0007787658410143285 0.9922115592912177
0.6174815096277166 0.611653160488281 0.007066305219717408
0.02306242504141576 0.5247746602583893 0.3998609717152556
0.046665663213615434 0.9737555188414594 0.23277134043030429
0.09060643453282081 0.6183860093330874 0.38246199126716285
0.9832308858067884 0.46676289324798004 0.8599404067363208
0.0 1.0 1.0
0.8889632380589146 0.07966487765544795 0.222429261717161
0.63595024487893 0.18134430067452503 0.955200115172112
0.5828774055858909 0.24011363955390447 0.36659552112122046
0.6287144482720189 0.9812444849617242 0.26616364886403016
0.1760796951841113 0.5195021575709593 0.09410167398101912
0.9843313963376389 0.9717615903205736 0.9736601250030511
0.01604555268474484 0.0033827981670873095 0.973824818147243