In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split , GridSearchCV
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

In [3]:
data =pd.read_csv("CargoPricesDatabase.csv")

In [5]:
param_grid_svm = {'C' : [0.1, 1, 10], 'kernel': ['linear', 'rbf']}
param_grid_dt = {'max_depth': [5, 10, 15], 'min_samples_split': [2, 5, 10]}
param_grid_rf = {'n_estimators': [100, 200], 'max_depth': [5, 10, 15]}
models = {'SVR': (SVR(), param_grid_svm),
    'DecisionTree': (DecisionTreeRegressor(), param_grid_dt),
    'RandomForest': (RandomForestRegressor(), param_grid_rf)
}

In [7]:
def find_best_model(x_train, y_train, x_test, y_test, target_name):
    best_model = None
    best_score = -float('inf')
    for model_name, (model, param_grid) in models.items():
        grid_search = GridSearchCV(model, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
        grid_search.fit(x_train, y_train)
        score = grid_search.best_estimator_.score(x_test, y_test)
        print(f"Model: {model_name}, Target: {target_name}, Best Params: {grid_search.best_params_}, Score: {score}")
        if score > best_score:
            best_score = score
            best_model = grid_search.best_estimator_
    print(f"\nBest model for {target_name}: {type(best_model).__name__} with score: {best_score}\n")
    return best_model

In [9]:
x = data.drop(columns=["Source" , "Destination" , "Price"])

In [11]:
y = data["Price"]

In [13]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [15]:
best_model = find_best_model(x_train, y_train, x_test, y_test, "Price")

Model: SVR, Target: Price, Best Params: {'C': 10, 'kernel': 'linear'}, Score: 0.8949517601181413
Model: DecisionTree, Target: Price, Best Params: {'max_depth': 10, 'min_samples_split': 5}, Score: 0.9680855569871002
Model: RandomForest, Target: Price, Best Params: {'max_depth': 15, 'n_estimators': 200}, Score: 0.9797884279587312

Best model for Price: RandomForestRegressor with score: 0.9797884279587312



In [17]:
y_pred = best_model.predict(x_test)
accuracy_cargo = best_model.score(x_test, y_test)

In [19]:
y_pred

array([2367.11955   , 2002.74775125, 1646.08695   , 3546.89445   ,
       1401.21014167, 2986.85576155, 2696.1620375 , 4115.6706    ,
       1374.2332    , 2811.2464381 , 2165.6075    , 4515.1108    ,
       2676.6045    , 3152.25958488, 1409.2236006 , 2249.85785   ,
        231.83475   ,  315.34315   , 4954.2334    , 3684.78165   ,
       5025.90725   , 1760.2308    , 1595.82860833, 3917.57375   ,
       1186.49485   , 1204.27555   , 3161.39379792,  254.4228    ,
       3456.2525    , 2517.05824435, 5100.863     , 2497.94685   ,
       1040.51654821, 2507.57905   , 2403.57483542, 2829.47531931,
       2209.40003125, 4298.1093    , 3862.2288    , 2212.6049    ,
       1361.53624167, 1490.5862531 , 2336.65663929, 3354.59717708,
       2599.2174    , 2993.53371202, 3918.43265   , 2833.92721857,
       3235.99355   , 1389.43891667, 3800.5623    ,  370.09805   ,
       2670.13625   , 4377.0742    , 4391.43545   ,  208.45235   ,
       1290.28946333, 2134.32915   , 1191.2888    , 4935.3621 

In [21]:
accuracy_cargo

0.9797884279587312

In [23]:
def predict_new_values():
    print("\nEnter the following values for prediction:")
    features = []
    for col in x.columns:
        value = float(input(f"Enter {col}: "))
        features.append(value)
    input_data = pd.DataFrame([features], columns=x.columns)
    cargo_pred = best_model.predict(input_data)
    print("calculating........")
    print(f"Predicted Price: {cargo_pred[0]}")

In [25]:
predict_new_values()


Enter the following values for prediction:


Enter Distance:  350
Enter Cargo:  3
Enter Urgency:  1
Enter Fragile:  0
Enter Weight:  50


calculating........
Predicted Price: 2505.481672916668


In [27]:
import joblib
joblib.dump(best_model, 'DeliveryPrice.pkl')

['DeliveryPrice.pkl']