In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
data = pd.read_csv("Combined_Dataset.csv")
data2 = data.iloc[:3000]

In [2]:
relevant_columns = ['HSD_Requirement', 'LDO_Requirement', 'FO_Requirement', 
                    'LSHS_Requirement', 'SKO_Requirement', 'MS_Requirement', 
                    'Truck_Capacity', 'Weather', 'Traffic', 'Local_Infrastructure', 
                    'Night_Driving', 'Customer_Satisfaction', 'Truck_Number']

In [3]:
clusters = data2['Cluster_ID'].unique()
models = {}
for cluster in clusters:
    cluster_data = data2[data2['Cluster_ID'] == cluster]
    cluster_data = cluster_data[relevant_columns]  
    label_encoders = {}
    for column in ['Weather', 'Traffic', 'Local_Infrastructure', 'Night_Driving', 'Customer_Satisfaction']:
        label_encoders[column] = LabelEncoder()
        cluster_data[column] = label_encoders[column].fit_transform(cluster_data[column])
    X = cluster_data.drop(columns=['Truck_Number'])
    y = cluster_data['Truck_Number']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = RandomForestClassifier()
    model.fit(X_train, y_train)
    models[cluster] = model

In [4]:
# Step 3: Predict Truck Allocation based on cluster
def predict_truck_allocation(input_data, cluster_id):
    input_df = pd.DataFrame([input_data])
    for column in ['Weather', 'Traffic', 'Local_Infrastructure', 'Night_Driving', 'Customer_Satisfaction']:
        input_df[column] = label_encoders[column].transform([input_data[column]])
    
    model = models[cluster_id]
    predicted_truck = model.predict(input_df.drop(columns=['Customer_ID']))
    return predicted_truck

In [5]:
def find_cluster_id(input_data):
    customer_id = input_data['Customer_ID']
    customer_data = data[data['Customer_ID'] == customer_id]
    if len(customer_data) == 0:
        print("Customer ID not found in the dataset")
        return None
    cluster_id = customer_data.iloc[0]['Cluster_ID']
    return cluster_id

In [6]:
input_data = {
   'HSD_Requirement': 30,
    'LDO_Requirement': 20,
    'FO_Requirement': 25,
    'LSHS_Requirement': 15,
    'SKO_Requirement': 10,
    'MS_Requirement': 35, 
    'Truck_Capacity': 450,
    'Weather': 'Cloudy', 
    'Traffic': 2,  
    'Local_Infrastructure': 'Good', 
    'Night_Driving': 'Yes', 
    'Customer_Satisfaction': 'Good',  
    'Customer_ID': 'Customer758',
    #'Cluster_ID': 'Cluster1'  # Assuming you have cluster ID information for each customer
}
cluster_id = find_cluster_id(input_data)
if cluster_id is not None:
    print(f"Cluster ID for Customer ID '{input_data['Customer_ID']}' is '{cluster_id}'")
predicted_truck = predict_truck_allocation(input_data, cluster_id)
print(predicted_truck)

Cluster ID for Customer ID 'Customer758' is 'Cluster58'
['Cluster58_Truck1152']


In [7]:
# Step 4: Get Priority List - This needs to be implemented based on your requirements.
# Step 5: Check Truck Availability - Implement mechanism to check truck availability based on real-time coordinates.