In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.metrics import accuracy_score

In [7]:
data_file = "C:\\Users\\raghu\\OneDrive\\Desktop\\python\\air cargo supplychain.xlsx"
data_frame = pd.read_excel(data_file, sheet_name="Sheet1")

In [8]:
print("Null values in dataset:")
print(data_frame.isnull().sum())

Null values in dataset:
Shipment ID                       0
Origin Airport                    0
Destination Airport               0
Shipment Weight (kg)              0
Shipment Volume (m³)              0
Freight Cost (USD)                0
Mode of Transport                 0
Carrier Name                      0
Transit Time (hours)              0
Temperature-Sensitive             0
Perishable Goods                  0
Customs Clearance Time (hours)    0
Weather Conditions                0
Day of Week                       0
Delay Status                      0
dtype: int64


In [9]:
data_frame = data_frame.dropna()

In [10]:
cat_columns = ['Origin Airport', 'Destination Airport', 'Mode of Transport', 'Carrier Name', 
               'Temperature-Sensitive', 'Perishable Goods', 'Weather Conditions', 'Day of Week']
data_frame = pd.get_dummies(data_frame, columns=cat_columns, drop_first=True)


In [14]:
data_frame['Delay Status'] = data_frame['Delay Status'].map({'Delayed': 1, 'On-Time': 0})

In [15]:
features = data_frame.drop(columns=['Shipment ID', 'Delay Status'])
target = data_frame['Delay Status']

In [16]:
scaler = StandardScaler()
features = scaler.fit_transform(features)

In [17]:
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.25, random_state=50)

In [18]:
logistic_model = LogisticRegression(max_iter=1200)
logistic_model.fit(X_train, y_train)
pred_logistic = logistic_model.predict(X_test)
print("Logistic Regression Accuracy:", accuracy_score(y_test, pred_logistic))

Logistic Regression Accuracy: 0.4992


In [19]:
decision_tree = DecisionTreeClassifier(max_depth=12, min_samples_split=6, min_samples_leaf=3, random_state=50)
decision_tree.fit(X_train, y_train)
pred_tree = decision_tree.predict(X_test)
print("Decision Tree Accuracy:", accuracy_score(y_test, pred_tree))

Decision Tree Accuracy: 0.4944


In [20]:
random_forest = RandomForestClassifier(n_estimators=150, max_depth=18, min_samples_split=6, min_samples_leaf=3, random_state=50)
random_forest.fit(X_train, y_train)
pred_forest = random_forest.predict(X_test)
print("Random Forest Accuracy:", accuracy_score(y_test, pred_forest))

Random Forest Accuracy: 0.5042


In [21]:
ada_boost = AdaBoostClassifier(n_estimators=250, learning_rate=1.0, random_state=50)
ada_boost.fit(X_train, y_train)
pred_ada = ada_boost.predict(X_test)
print("AdaBoost Classifier Accuracy:", accuracy_score(y_test, pred_ada))

AdaBoost Classifier Accuracy: 0.4996
