In [1]:
# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report

# Load your dataset
df = pd.read_csv('/kaggle/input/logistics-vehicle-maintenance-history-dataset/logistics_dataset_with_maintenance_required.csv')

# 1. Data Preprocessing

# Convert Last_Maintenance_Date to datetime format
df['Last_Maintenance_Date'] = pd.to_datetime(df['Last_Maintenance_Date'], errors='coerce')

# Extract year, month, and day into separate columns
df['Last_Maintenance_Year'] = df['Last_Maintenance_Date'].dt.year
df['Last_Maintenance_Month'] = df['Last_Maintenance_Date'].dt.month
df['Last_Maintenance_Day'] = df['Last_Maintenance_Date'].dt.day

# Drop the original date column
df.drop(columns=['Last_Maintenance_Date'], inplace=True)

# Fill missing values if any
df.fillna(method='ffill', inplace=True)

# Encode all categorical columns
label_encoders = {}
for column in df.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])
    label_encoders[column] = le  # Save encoders for future use if needed

# Separate features and target
X = df.drop(columns=['Maintenance_Required'])
y = df['Maintenance_Required']

# Scale numerical features
scaler = StandardScaler()
X[['Usage_Hours', 'Load_Capacity', 'Actual_Load', 'Predictive_Score', 'Delivery_Times', 'Downtime_Maintenance', 'Impact_on_Efficiency']] = scaler.fit_transform(
    X[['Usage_Hours', 'Load_Capacity', 'Actual_Load', 'Predictive_Score', 'Delivery_Times', 'Downtime_Maintenance', 'Impact_on_Efficiency']]
)

# 2. Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 3. Model Training
model = LogisticRegression(random_state=42, max_iter=1000)
model.fit(X_train, y_train)

# 4. Model Evaluation
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

  df.fillna(method='ffill', inplace=True)


              precision    recall  f1-score   support

           0       0.98      0.99      0.99      4311
           1       1.00      0.99      1.00     14089

    accuracy                           0.99     18400
   macro avg       0.99      0.99      0.99     18400
weighted avg       0.99      0.99      0.99     18400



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [2]:
# Ensure dummy_data has all necessary columns as in the original training data
# Start by creating a DataFrame with the correct columns
all_features = X.columns  # X is the original feature DataFrame from training
dummy_data_full = pd.DataFrame(columns=all_features)

# Define your dummy data, filling in only the columns you know
dummy_data = pd.DataFrame([{
    'Vehicle_ID': 999,
    'Make_and_Model': label_encoders['Make_and_Model'].transform(['Ford F-150'])[0],
    'Year_of_Manufacture': 2020,
    'Vehicle_Type': label_encoders['Vehicle_Type'].transform(['Truck'])[0],
    'Usage_Hours': 4500,
    'Route_Info': label_encoders['Route_Info'].transform(['Rural'])[0],
    'Load_Capacity': 8.5,
    'Actual_Load': 7.8,
    'Last_Maintenance_Year': 2023,
    'Last_Maintenance_Month': 6,
    'Last_Maintenance_Day': 1,
    'Maintenance_Type': label_encoders['Maintenance_Type'].transform(['Oil Change'])[0],
    'Predictive_Score': 0.2,
    'Weather_Conditions': label_encoders['Weather_Conditions'].transform(['Clear'])[0],
    'Road_Conditions': label_encoders['Road_Conditions'].transform(['Highway'])[0],
    'Delivery_Times': 40,
    'Downtime_Maintenance': 0.1,
    'Impact_on_Efficiency': 0.15
}])

# Add missing columns with default values (e.g., zero or NaN)
for col in all_features:
    if col not in dummy_data.columns:
        dummy_data[col] = 0  # Use an appropriate placeholder, like 0 or np.nan

print(dummy_data.columns)

# Select columns to scale
columns_to_scale = ['Usage_Hours', 'Load_Capacity', 'Actual_Load', 'Predictive_Score', 
                    'Delivery_Times', 'Downtime_Maintenance', 'Impact_on_Efficiency']

# Apply scaler on the selected columns
dummy_data[columns_to_scale] = scaler.transform(dummy_data[columns_to_scale])

# Predict maintenance requirement
maintenance_prediction = model.predict(dummy_data[all_features])
print("Maintenance Required (1 means Yes, 0 means No):", maintenance_prediction[0])

Index(['Vehicle_ID', 'Make_and_Model', 'Year_of_Manufacture', 'Vehicle_Type',
       'Usage_Hours', 'Route_Info', 'Load_Capacity', 'Actual_Load',
       'Last_Maintenance_Year', 'Last_Maintenance_Month',
       'Last_Maintenance_Day', 'Maintenance_Type', 'Predictive_Score',
       'Weather_Conditions', 'Road_Conditions', 'Delivery_Times',
       'Downtime_Maintenance', 'Impact_on_Efficiency', 'Maintenance_Cost',
       'Engine_Temperature', 'Tire_Pressure', 'Fuel_Consumption',
       'Battery_Status', 'Vibration_Levels', 'Oil_Quality', 'Brake_Condition',
       'Failure_History', 'Anomalies_Detected'],
      dtype='object')
Maintenance Required (1 means Yes, 0 means No): 0


In [3]:
# Saving the model using pickle
import pickle

# Save the model to a file
with open('model.pkl', 'wb') as f:
    pickle.dump(model, f)

print("Model saved successfully!")

Model saved successfully!


In [4]:
import joblib
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Assuming you have already defined and fitted label_encoders and scaler
# Save the label encoders
for name, encoder in label_encoders.items():
    joblib.dump(encoder, f'{name}_encoder.pkl')  # Saves each encoder

# Save the scaler
joblib.dump(scaler, 'scaler.pkl')  # Saves the scaler

# Later, when you need to load them for predictions:

# Load the label encoders
loaded_label_encoders = {}
for name in label_encoders.keys():
    loaded_label_encoders[name] = joblib.load(f'{name}_encoder.pkl')

# Load the scaler
loaded_scaler = joblib.load('scaler.pkl')


# Test

In [5]:
df.head()

Unnamed: 0,Vehicle_ID,Make_and_Model,Year_of_Manufacture,Vehicle_Type,Usage_Hours,Route_Info,Load_Capacity,Actual_Load,Maintenance_Type,Maintenance_Cost,...,Predictive_Score,Maintenance_Required,Weather_Conditions,Road_Conditions,Delivery_Times,Downtime_Maintenance,Impact_on_Efficiency,Last_Maintenance_Year,Last_Maintenance_Month,Last_Maintenance_Day
0,1,1,2022,0,530,1,7.534549,9.004247,1,110.165442,...,0.171873,1,0,0,30.0,0.093585,0.150063,2023,4,9
1,2,3,2015,1,10679,1,7.671728,6.111785,2,265.898087,...,0.24667,1,0,1,30.0,3.361201,0.343017,2023,7,20
2,3,0,2022,1,4181,1,2.901159,3.006055,1,412.48347,...,0.455236,1,0,0,48.627823,1.3653,0.1,2023,3,17
3,4,0,2011,0,2974,2,15.893347,18.82529,2,444.110857,...,0.060208,1,0,0,30.0,0.0,0.135749,2024,5,1
4,5,1,2014,1,2539,1,60.66832,65.605463,2,478.841922,...,0.264929,1,1,2,300.0,6.608704,0.395193,2023,11,15


In [6]:
newdf = pd.read_csv('/kaggle/input/logistics-vehicle-maintenance-history-dataset/logistics_dataset_with_maintenance_required.csv')
newdf.head()

Unnamed: 0,Vehicle_ID,Make_and_Model,Year_of_Manufacture,Vehicle_Type,Usage_Hours,Route_Info,Load_Capacity,Actual_Load,Last_Maintenance_Date,Maintenance_Type,...,Brake_Condition,Failure_History,Anomalies_Detected,Predictive_Score,Maintenance_Required,Weather_Conditions,Road_Conditions,Delivery_Times,Downtime_Maintenance,Impact_on_Efficiency
0,1,Ford F-150,2022,Truck,530,Rural,7.534549,9.004247,2023-04-09,Oil Change,...,Good,1,0,0.171873,1,Clear,Highway,30.0,0.093585,0.150063
1,2,Volvo FH,2015,Van,10679,Rural,7.671728,6.111785,2023-07-20,Tire Rotation,...,Fair,1,0,0.24667,1,Clear,Rural,30.0,3.361201,0.343017
2,3,Chevy Silverado,2022,Van,4181,Rural,2.901159,3.006055,2023-03-17,Oil Change,...,Good,1,1,0.455236,1,Clear,Highway,48.627823,1.3653,0.1
3,4,Chevy Silverado,2011,Truck,2974,Urban,15.893347,18.82529,2024-05-01,Tire Rotation,...,Good,0,1,0.060208,1,Clear,Highway,30.0,0.0,0.135749
4,5,Ford F-150,2014,Van,2539,Rural,60.66832,65.605463,2023-11-15,Tire Rotation,...,Good,1,1,0.264929,1,Rainy,Urban,300.0,6.608704,0.395193
