Optimization Model

Import Libraries

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score


Load dataset

In [None]:
file_path = 'Preprocessed.csv'
data = pd.read_csv("Preprocessed.csv")

In [None]:
data['tip_paid'] = (data['tip_amount'] > 0).astype(int)


X = data[['PULocationID', 'DOLocationID', 'trip_time_category']]
y = data['tip_paid']


X = pd.get_dummies(X, columns=['PULocationID', 'DOLocationID', 'trip_time_category'], drop_first=True)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


model = RandomForestClassifier(n_estimators=50, random_state=42)  
model.fit(X_train, y_train)


important_locations = [265,  55, 117,  86, 197,  76, 130, 244,  14,  75, 132,  39, 213,  93,216,
205,  89,  51,  95,  61, 108, 254,  74, 215,  35]


results = []
for pu in important_locations:
    for do in important_locations:
        if pu != do:  
            for time_category in ['Low', 'Normal', 'Peak']:
                sample = pd.DataFrame({
                    'PULocationID': [pu],
                    'DOLocationID': [do],
                    'trip_time_category': [time_category]
                })
                sample_encoded = pd.get_dummies(sample, columns=['PULocationID', 'DOLocationID', 'trip_time_category'])
                sample_encoded = sample_encoded.reindex(columns=X.columns, fill_value=0)
                predicted_tip = model.predict(sample_encoded)[0]
                results.append({
                    'PULocationID': pu,
                    'DOLocationID': do,
                    'trip_time_category': time_category,
                    'tip_paid': 'Yes' if predicted_tip == 1 else 'No'
                })


y_pred = model.predict(X_test)
model_accuracy = accuracy_score(y_test, y_pred)


results_df = pd.DataFrame(results)
results_df.to_csv('Important_Locations_Tip_Predictions.csv', index=False)


demand_matrix = pd.read_csv('Main_OD.csv', index_col=0)  
low_cost_matrix = pd.read_csv('cost_matrix_low.csv', index_col=0)  
normal_cost_matrix = pd.read_csv('cost_matrix_normal.csv', index_col=0)  
peak_cost_matrix = pd.read_csv('cost_matrix_peak.csv', index_col=0)  
distance_matrix = pd.read_csv('distance_matrix.csv', index_col=0)  
Important_Locations = pd.read_csv('Important_Locations_Tip_Predictions.csv')
Important_Locations.set_index(['PULocationID', 'DOLocationID', 'trip_time_category'], inplace=True)


from pulp import LpMaximize, LpProblem, LpVariable, lpSum

locations = demand_matrix.index.tolist()
times = ['Low', 'Normal', 'Peak']

model = LpProblem("Maximize_Profit", LpMaximize)

x = LpVariable.dicts("X", [(i, j, t) for i in locations for j in locations for t in times],
                     lowBound=0, cat="Continuous")

objective = lpSum(
    (40 - low_cost_matrix.loc[i, j]) * x[i, j, 'Low'] +
    (40 - normal_cost_matrix.loc[i, j]) * x[i, j, 'Normal'] +
    (40 - peak_cost_matrix.loc[i, j]) * x[i, j, 'Peak']
    for i in locations for j in locations if i != j
) + lpSum(
    5 * model_accuracy * x[i, j, t]
    if (i, j, t) in Important_Locations.index and Important_Locations.loc[(i, j, t), 'tip_paid'] == 'Yes' else 0
    for i in locations for j in locations if i != j for t in times
)
model += objective


model += lpSum(
    distance_matrix.loc[i, j] * x[i, j, t]
    for i in locations for j in locations if i != j for t in times
) <= 1000, "Total_Distance"

for i in locations:
    for j in locations:
        if i != j:
            for t in times:
                model += x[i, j, t] <= demand_matrix.loc[i, j], f"Demand_Constraint_{i}_{j}_{t}"


model.solve()


print("Status:", model.status)
print("Objective Value (Profit):", model.objective.value())

decision_values = pd.DataFrame([
    {"PULocationID": i, "DOLocationID": j, "Time": t, "Trips": x[i, j, t].varValue}
    for i in locations for j in locations if i != j for t in times
])

print(decision_values)
decision_values.to_csv('Optimal_Decisions.csv', index=False)