In [34]:
import pandas as pd

# Load dataset
df = pd.read_csv("smart_logistics_dataset.csv")


# Quick check
print("First 5 rows of data:")
print(df.head())
print("\nInitial dataset info:")
df.info()
print(f"\nInitial shape: {df.shape}")

# Check missing values before cleaning
print("\nMissing values before cleaning:")
print(df.isnull().sum())

# Handle missing values (example: fill with mean/median/mode or forward fill)
df['Temperature'] = df['Temperature'].fillna(df['Temperature'].mean())
df['Humidity'] = df['Humidity'].fillna(df['Humidity'].median())
df['Shipment_Status'] = df['Shipment_Status'].fillna(df['Shipment_Status'].mode()[0])
df['Logistics_Delay_Reason'] = df['Logistics_Delay_Reason'].fillna(df['Logistics_Delay_Reason'].mode()[0])
df['Traffic_Status'] = df['Traffic_Status'].fillna(df['Traffic_Status'].mode()[0])
print(f"After handling missing values: {df.shape}")


# Remove duplicates
df = df.drop_duplicates()
print(f"After removing duplicates: {df.shape}")


# Convert Timestamp column to datetime
df['Timestamp'] = pd.to_datetime(df['Timestamp'], errors='coerce', dayfirst=True)

# Print first 5 values to check
print(df['Timestamp'].head())

# Check the datatype after conversion
print(df['Timestamp'].dtype)



# Save cleaned dataset
df.to_csv("cleaned_data.csv", index=False)
print("\n✅ Cleaned dataset saved as cleaned_data.csv")

# Save cleaned dataset
df.to_csv("cleaned_data_v2.csv", index=False)

# Print info
print("Info of the cleaned data:")
print(df.info())

# Print final shape
print("Final shape of the cleaned data:", df.shape)






 

#

First 5 rows of data:
          Timestamp  Asset_ID  Latitude  Longitude  Inventory_Level  \
0  20/03/2024 00:11   Truck_7  -65.7383    11.2497              390   
1  30/10/2024 07:53   Truck_6   22.2748  -131.7086              491   
2  29/07/2024 18:42  Truck_10   54.9232    79.5455              190   
3  28/10/2024 00:50   Truck_9   42.3900    -1.4788              330   
4  27/09/2024 15:52   Truck_7  -65.8477    47.9468              480   

  Shipment_Status  Temperature  Humidity Traffic_Status  Waiting_Time  \
0         Delayed         27.0      67.8         Detour            38   
1      In Transit         22.5      54.3          Heavy            16   
2      In Transit         25.2      62.2         Detour            34   
3       Delivered         25.4      52.3          Heavy            37   
4         Delayed         20.5      57.2          Clear            56   

   User_Transaction_Amount  User_Purchase_Frequency Logistics_Delay_Reason  \
0                      320        