In [9]:
import pandas as pd

# Load the dataset
container_dataset = pd.read_csv("ContainerData.csv")
print("------- Displayed few of the rows -------")
print(container_dataset.head())

print("------- Missing Values -------")
print(container_dataset.isnull().sum())

print("------- Data Types -------")
print(container_dataset.info())


------- Displayed few of the rows -------
      Height      Width       Hue  Times moved  Hours Priority
0   7.550614  20.814593  0.294862            0     12      low
1   7.705458  18.818473 -0.922008            2     18     high
2   7.684569  15.294488 -0.016228            1      4     high
3   8.649290  25.282276  0.905113            2      5      low
4  11.254993  11.440159  0.482241            1     10      low
------- Missing Values -------
Height         0
Width          0
Hue            0
Times moved    0
Hours          0
Priority       0
dtype: int64
------- Data Types -------
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4000 entries, 0 to 3999
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Height       4000 non-null   float64
 1   Width        4000 non-null   float64
 2   Hue          4000 non-null   float64
 3   Times moved  4000 non-null   int64  
 4   Hours        4000 non-null   int64  
 5   Pr

In [10]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
container_dataset["Priority"] = label_encoder.fit_transform(container_dataset["Priority"])

container_dataset = pd.get_dummies(container_dataset, columns=["Hue"])

print(container_dataset.head())

      Height      Width  Times moved  Hours  Priority  Hue_-0.999928443  \
0   7.550614  20.814593            0     12         1             False   
1   7.705458  18.818473            2     18         0             False   
2   7.684569  15.294488            1      4         0             False   
3   8.649290  25.282276            2      5         1             False   
4  11.254993  11.440159            1     10         1             False   

   Hue_-0.999834185  Hue_-0.999809045  Hue_-0.999296776  Hue_-0.999134059  \
0             False             False             False             False   
1             False             False             False             False   
2             False             False             False             False   
3             False             False             False             False   
4             False             False             False             False   

   ...  Hue_0.996052246  Hue_0.996239313  Hue_0.996265631  Hue_0.996554528  \
0  ...  

In [11]:
from sklearn.model_selection import train_test_split

x = container_dataset.drop("Priority", axis=1) # All columns except priority
y = container_dataset["Priority"]

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

print(f"Training samples: {len(x_train)}, Testing samples: {len(x_test)}")


Training samples: 3200, Testing samples: 800


In [12]:
# Decision Tree is a simple and effective for structuring our data 

from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

ML_Model = DecisionTreeClassifier(max_depth=5, random_state=42)

ML_Model.fit(x_train, y_train)

y_prediction = ML_Model.predict(x_test)

Accuracy = accuracy_score(y_test, y_prediction) 
print(f"Accuracy: {Accuracy:.2f}")
print(classification_report(y_test, y_prediction))

Accuracy: 0.95
              precision    recall  f1-score   support

           0       0.99      0.90      0.95       413
           1       0.91      0.99      0.95       387

    accuracy                           0.95       800
   macro avg       0.95      0.95      0.95       800
weighted avg       0.95      0.95      0.95       800



In [13]:
# [height, width, Hue, times_moved, hours_in_dock=10]
# Store feature names
feature_names = x.columns

# Create a new container matching the transformed features
new_container = pd.DataFrame([[2.5, 1.8, 3, 10, 0]], columns=['Height', 'Width', 'Hue', 'Times moved', 'Hours'])

# Convert 'Hue' into one-hot encoding like training
new_container = pd.get_dummies(new_container, columns=['Hue'])

# Ensure columns match by reindexing
new_container = new_container.reindex(columns=feature_names, fill_value=0)

# Now predict
predicted_priority = ML_Model.predict(new_container)[0]
priority_label = "High" if predicted_priority == 1 else "Low"

print(f"Predicted priority: {priority_label}")



Predicted priority: Low


In [14]:
from joblib import dump

# Save the trained ML model
dump(ML_Model, 'container_priority_model.joblib')

print("Model saved as 'container_priority_model.joblib'")


Model saved as 'container_priority_model.joblib'


# Approach

- Using Decision Tree Classifer to predict whether a container has high or low priority 
- The dataset contained 5 features about each container 