In [2]:
# Basic EDA of two datasets
# Dataset 1: California Housing Prices

import kagglehub
import pandas as pd

# 1. Dataset overview
# Download latest version
path = kagglehub.dataset_download("camnugent/california-housing-prices")

print("Path to dataset files:", path)




Path to dataset files: C:\Users\sassa\.cache\kagglehub\datasets\camnugent\california-housing-prices\versions\1


In [3]:
# Name dataset
housing_df = pd.read_csv(f"{path}/housing.csv")

In [4]:
# Get rows and columns of dataset
housing_df.shape

(20640, 10)

In [5]:
# Get column names
housing_df.columns


Index(['longitude', 'latitude', 'housing_median_age', 'total_rooms',
       'total_bedrooms', 'population', 'households', 'median_income',
       'median_house_value', 'ocean_proximity'],
      dtype='object')

In [6]:
# Get column types
housing_df.dtypes

longitude             float64
latitude              float64
housing_median_age    float64
total_rooms           float64
total_bedrooms        float64
population            float64
households            float64
median_income         float64
median_house_value    float64
ocean_proximity        object
dtype: object

In [7]:
# Get non-null counts
housing_df.notnull().sum()

longitude             20640
latitude              20640
housing_median_age    20640
total_rooms           20640
total_bedrooms        20433
population            20640
households            20640
median_income         20640
median_house_value    20640
ocean_proximity       20640
dtype: int64

In [8]:
# Get number of unique values in each column
housing_df.nunique()

longitude               844
latitude                862
housing_median_age       52
total_rooms            5926
total_bedrooms         1923
population             3888
households             1815
median_income         12928
median_house_value     3842
ocean_proximity           5
dtype: int64

In [9]:
# Get random sample of 10 rows to see example values
housing_examples = housing_df.sample(5)
housing_examples

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity
371,-122.15,37.74,41.0,856.0,178.0,571.0,191.0,3.1458,130600.0,NEAR BAY
8448,-118.38,33.91,36.0,2904.0,515.0,1463.0,534.0,5.8374,289600.0,<1H OCEAN
5671,-118.31,33.72,26.0,2711.0,508.0,1372.0,459.0,4.1451,326700.0,NEAR OCEAN
19168,-122.71,38.4,17.0,1690.0,464.0,833.0,445.0,1.439,140600.0,<1H OCEAN
11941,-117.42,33.94,26.0,2420.0,532.0,1383.0,469.0,3.5403,113500.0,INLAND


In [10]:
# Create summary table
summary_1_2 = pd.DataFrame({"Column name":housing_df.columns, "Data type": housing_df.dtypes, "Non-null count": housing_df.notnull().sum(), "Unique values":housing_df.nunique()})

summary_1_2["Examples"] = [
    ", ".join(map(str, housing_examples[col].tolist()))
    for col in housing_df.columns
]

summary_1_2 = summary_1_2.reset_index(drop=True)

markdown_table1_2 = summary_1_2.to_markdown(index=False)
#print(markdown_table1_2)
print(markdown_table1_2)

| Column name        | Data type   |   Non-null count |   Unique values | Examples                                           |
|:-------------------|:------------|-----------------:|----------------:|:---------------------------------------------------|
| longitude          | float64     |            20640 |             844 | -122.15, -118.38, -118.31, -122.71, -117.42        |
| latitude           | float64     |            20640 |             862 | 37.74, 33.91, 33.72, 38.4, 33.94                   |
| housing_median_age | float64     |            20640 |              52 | 41.0, 36.0, 26.0, 17.0, 26.0                       |
| total_rooms        | float64     |            20640 |            5926 | 856.0, 2904.0, 2711.0, 1690.0, 2420.0              |
| total_bedrooms     | float64     |            20433 |            1923 | 178.0, 515.0, 508.0, 464.0, 532.0                  |
| population         | float64     |            20640 |            3888 | 571.0, 1463.0, 1372.0, 833.0, 1383.0 

In [11]:
#getting some descriptive statistics
housing_df.describe()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value
count,20640.0,20640.0,20640.0,20640.0,20433.0,20640.0,20640.0,20640.0,20640.0
mean,-119.569704,35.631861,28.639486,2635.763081,537.870553,1425.476744,499.53968,3.870671,206855.816909
std,2.003532,2.135952,12.585558,2181.615252,421.38507,1132.462122,382.329753,1.899822,115395.615874
min,-124.35,32.54,1.0,2.0,1.0,3.0,1.0,0.4999,14999.0
25%,-121.8,33.93,18.0,1447.75,296.0,787.0,280.0,2.5634,119600.0
50%,-118.49,34.26,29.0,2127.0,435.0,1166.0,409.0,3.5348,179700.0
75%,-118.01,37.71,37.0,3148.0,647.0,1725.0,605.0,4.74325,264725.0
max,-114.31,41.95,52.0,39320.0,6445.0,35682.0,6082.0,15.0001,500001.0


In [12]:
#converting to markdown format
markdown_table1_3_1 = housing_df.describe().to_markdown()
# Print the markdown tables
print(markdown_table1_3_1)

|       |   longitude |    latitude |   housing_median_age |   total_rooms |   total_bedrooms |   population |   households |   median_income |   median_house_value |
|:------|------------:|------------:|---------------------:|--------------:|-----------------:|-------------:|-------------:|----------------:|---------------------:|
| count | 20640       | 20640       |           20640      |      20640    |        20433     |     20640    |     20640    |     20640       |                20640 |
| mean  |  -119.57    |    35.6319  |              28.6395 |       2635.76 |          537.871 |      1425.48 |       499.54 |         3.87067 |               206856 |
| std   |     2.00353 |     2.13595 |              12.5856 |       2181.62 |          421.385 |      1132.46 |       382.33 |         1.89982 |               115396 |
| min   |  -124.35    |    32.54    |               1      |          2    |            1     |         3    |         1    |         0.4999  |                14999 

In [83]:
#getting basic descriptive statistics for categorical columns
cat_desc_1 = housing_df.describe(include="object")

# Rename the existing rows
cat_desc_1.rename(index={
    "count": "Count",
    "unique": "Number of unique values",
    "top": "Most frequent value",
    "freq": "Most frequent value (frequency)"
}, inplace=True)

# Initialize lists to store least frequent values and their frequencies
least_values = []
least_freqs = []

# Loop through each categorical column to find least frequent values
for col in housing_df.select_dtypes(include="object").columns:
    value_counts = housing_df[col].value_counts(dropna=False)
    least_values.append(value_counts.index[-1])
    least_freqs.append(value_counts.iloc[-1])

# Add least frequent values and their frequencies to the DataFrame
cat_desc_1.loc["Least frequent value"] = least_values
cat_desc_1.loc["Least frequent value (frequency)"] = least_freqs

# Display the result
print(cat_desc_1)


                                 ocean_proximity
Count                                      20640
Number of unique values                        5
Most frequent value                    <1H OCEAN
Most frequent value (frequency)             9136
Least frequent value                      ISLAND
Least frequent value (frequency)               5


In [104]:
#converting to markdown format
markdown_table1_3_2 = cat_desc_1.to_markdown()
# Print the markdown tables
print(markdown_table1_3_2)

|                                  | ocean_proximity   |
|:---------------------------------|:------------------|
| Count                            | 20640             |
| Number of unique values          | 5                 |
| Most frequent value              | <1H OCEAN         |
| Most frequent value (frequency)  | 9136              |
| Least frequent value             | ISLAND            |
| Least frequent value (frequency) | 5                 |


In [105]:
# Missing values per column
missing_count = housing_df.isnull().sum()
missing_percent = (missing_count / len(housing_df)) * 100

# Duplicated rows
duplicated_count = housing_df.duplicated().sum()
duplicated_percent = (duplicated_count / len(housing_df)) * 100

# Create summary DataFrame for Markdown
missing_housing_df = pd.DataFrame({
    "Column name": housing_df.columns,
    "Missing count": missing_count,
    "% Missing": missing_percent
})

# Print table
print(missing_housing_df)

# Print summary
print(f"\n**Total missing values:** {missing_count.sum()}")
print(f"**Percentage of dataset affected:** {(missing_count.sum() / (len(housing_df)*len(housing_df.columns)) * 100):.2f}%")
print(f"**Duplicated rows found:** {duplicated_count}")
print(f"**Percentage of rows in dataset affected:** {duplicated_percent:.2f}%")

                           Column name  Missing count  % Missing
longitude                    longitude              0   0.000000
latitude                      latitude              0   0.000000
housing_median_age  housing_median_age              0   0.000000
total_rooms                total_rooms              0   0.000000
total_bedrooms          total_bedrooms            207   1.002907
population                  population              0   0.000000
households                  households              0   0.000000
median_income            median_income              0   0.000000
median_house_value  median_house_value              0   0.000000
ocean_proximity        ocean_proximity              0   0.000000

**Total missing values:** 207
**Percentage of dataset affected:** 0.10%
**Duplicated rows found:** 0
**Percentage of rows in dataset affected:** 0.00%


In [106]:
#Convert to markdown format
print(missing_housing_df.to_markdown(index=False))
# Print summary
print(f"\n**Total missing values:** {missing_count.sum()}")
print(f"**Percentage of dataset affected:** {(missing_count.sum() / (len(housing_df)*len(housing_df.columns)) * 100):.2f}%")
print(f"**Duplicated rows found:** {duplicated_count}")
print(f"**Percentage of rows in dataset affected:** {duplicated_percent:.2f}%")

| Column name        |   Missing count |   % Missing |
|:-------------------|----------------:|------------:|
| longitude          |               0 |     0       |
| latitude           |               0 |     0       |
| housing_median_age |               0 |     0       |
| total_rooms        |               0 |     0       |
| total_bedrooms     |             207 |     1.00291 |
| population         |               0 |     0       |
| households         |               0 |     0       |
| median_income      |               0 |     0       |
| median_house_value |               0 |     0       |
| ocean_proximity    |               0 |     0       |

**Total missing values:** 207
**Percentage of dataset affected:** 0.10%
**Duplicated rows found:** 0
**Percentage of rows in dataset affected:** 0.00%


END OF DATASET 1
__________________________________________________________________________________________________________

In [107]:
#Dataset 2: Amazon
#2. Structure of the data

# Download latest version
path_amazon = kagglehub.dataset_download("sujalsuthar/amazon-delivery-dataset")

print("Path to dataset files:", path_amazon)
amazon_df = pd.read_csv(f"{path_amazon}/Amazon_delivery.csv")

Path to dataset files: C:\Users\sassa\.cache\kagglehub\datasets\sujalsuthar\amazon-delivery-dataset\versions\1


In [108]:
# Get size of dataset
amazon_df.shape

(43739, 16)

In [109]:
# Name dataset
amazon_df = pd.read_csv(f"{path_amazon}/amazon_delivery.csv")

In [110]:
# Get column names
amazon_df.columns


Index(['Order_ID', 'Agent_Age', 'Agent_Rating', 'Store_Latitude',
       'Store_Longitude', 'Drop_Latitude', 'Drop_Longitude', 'Order_Date',
       'Order_Time', 'Pickup_Time', 'Weather', 'Traffic', 'Vehicle', 'Area',
       'Delivery_Time', 'Category'],
      dtype='object')

In [111]:
# Get column types
amazon_df.dtypes

Order_ID            object
Agent_Age            int64
Agent_Rating       float64
Store_Latitude     float64
Store_Longitude    float64
Drop_Latitude      float64
Drop_Longitude     float64
Order_Date          object
Order_Time          object
Pickup_Time         object
Weather             object
Traffic             object
Vehicle             object
Area                object
Delivery_Time        int64
Category            object
dtype: object

In [112]:
# Get non-null counts
amazon_df.notnull().sum()

Order_ID           43739
Agent_Age          43739
Agent_Rating       43685
Store_Latitude     43739
Store_Longitude    43739
Drop_Latitude      43739
Drop_Longitude     43739
Order_Date         43739
Order_Time         43739
Pickup_Time        43739
Weather            43648
Traffic            43739
Vehicle            43739
Area               43739
Delivery_Time      43739
Category           43739
dtype: int64

In [113]:
# Get number of unique values in each column
amazon_df.nunique()

Order_ID           43739
Agent_Age             22
Agent_Rating          28
Store_Latitude       521
Store_Longitude      415
Drop_Latitude       4367
Drop_Longitude      4367
Order_Date            44
Order_Time           177
Pickup_Time          193
Weather                6
Traffic                5
Vehicle                4
Area                   4
Delivery_Time         89
Category              16
dtype: int64

In [114]:
# Get random sample of 5 rows to see example values
examples = amazon_df.sample(5)
examples

Unnamed: 0,Order_ID,Agent_Age,Agent_Rating,Store_Latitude,Store_Longitude,Drop_Latitude,Drop_Longitude,Order_Date,Order_Time,Pickup_Time,Weather,Traffic,Vehicle,Area,Delivery_Time,Category
43211,bbij645145248,23,4.8,30.905562,75.832841,30.935562,75.862841,2022-02-13,18:25:00,18:40:00,Sunny,Medium,motorcycle,Metropolitian,80,Skincare
30456,xacg833244334,24,5.0,22.728163,75.884212,22.758163,75.914212,2022-03-17,21:35:00,21:45:00,Sandstorms,Jam,scooter,Metropolitian,120,Cosmetics
33998,ggeb606441002,25,5.0,26.49095,80.318656,26.51095,80.338656,2022-02-15,08:20:00,08:25:00,Windy,Low,van,Urban,80,Snacks
37400,tuwp062851692,21,4.6,27.165108,78.015053,27.175108,78.025053,2022-02-11,08:45:00,09:00:00,Cloudy,Low,motorcycle,Metropolitian,105,Clothing
34734,cknb677120059,27,4.9,0.0,0.0,0.13,0.13,2022-03-08,23:20:00,23:30:00,Sandstorms,Low,motorcycle,Metropolitian,105,Kitchen


In [115]:
# Create summary table
summary_2_2 = pd.DataFrame({"Column name":amazon_df.columns, "Data type": amazon_df.dtypes, "Non-null count": amazon_df.notnull().sum(), "Unique values":amazon_df.nunique()})

summary_2_2["Examples"] = [
    ", ".join(map(str, examples[col].tolist()))
    for col in amazon_df.columns
]

summary_2_2 = summary_2_2.reset_index(drop=True)

markdown_table2_2 = summary_2_2.to_markdown(index=False)

#print(markdown_table2_2)
print(markdown_table2_2)


| Column name     | Data type   |   Non-null count |   Unique values | Examples                                                                  |
|:----------------|:------------|-----------------:|----------------:|:--------------------------------------------------------------------------|
| Order_ID        | object      |            43739 |           43739 | bbij645145248, xacg833244334, ggeb606441002, tuwp062851692, cknb677120059 |
| Agent_Age       | int64       |            43739 |              22 | 23, 24, 25, 21, 27                                                        |
| Agent_Rating    | float64     |            43685 |              28 | 4.8, 5.0, 5.0, 4.6, 4.9                                                   |
| Store_Latitude  | float64     |            43739 |             521 | 30.905562, 22.728163, 26.49095, 27.165108, 0.0                            |
| Store_Longitude | float64     |            43739 |             415 | 75.832841, 75.884212, 80.318656, 78.015053, 0.0

In [116]:
#getting some descriptive statistics 
amazon_df.describe()

Unnamed: 0,Agent_Age,Agent_Rating,Store_Latitude,Store_Longitude,Drop_Latitude,Drop_Longitude,Delivery_Time
count,43739.0,43685.0,43739.0,43739.0,43739.0,43739.0,43739.0
mean,29.567137,4.63378,17.21096,70.661177,17.459031,70.821842,124.905645
std,5.815155,0.334716,7.764225,21.475005,7.34295,21.153148,51.915451
min,15.0,1.0,-30.902872,-88.366217,0.01,0.01,10.0
25%,25.0,4.5,12.933298,73.170283,12.985996,73.28,90.0
50%,30.0,4.7,18.55144,75.898497,18.633626,76.002574,125.0
75%,35.0,4.9,22.732225,78.045359,22.785049,78.104095,160.0
max,50.0,6.0,30.914057,88.433452,31.054057,88.563452,270.0


In [117]:
#converting to markdown format
markdown_table2_3_1 = amazon_df.describe().to_markdown()
# Print the markdown tables
print(markdown_table2_3_1)

|       |   Agent_Age |   Agent_Rating |   Store_Latitude |   Store_Longitude |   Drop_Latitude |   Drop_Longitude |   Delivery_Time |
|:------|------------:|---------------:|-----------------:|------------------:|----------------:|-----------------:|----------------:|
| count | 43739       |   43685        |      43739       |        43739      |     43739       |       43739      |      43739      |
| mean  |    29.5671  |       4.63378  |         17.211   |           70.6612 |        17.459   |          70.8218 |        124.906  |
| std   |     5.81516 |       0.334716 |          7.76423 |           21.475  |         7.34295 |          21.1531 |         51.9155 |
| min   |    15       |       1        |        -30.9029  |          -88.3662 |         0.01    |           0.01   |         10      |
| 25%   |    25       |       4.5      |         12.9333  |           73.1703 |        12.986   |          73.28   |         90      |
| 50%   |    30       |       4.7      |         18.551

In [118]:
#getting basic descriptive statistics for categorical columns
cat_desc_2 = amazon_df.describe(include="object")

# Rename the existing rows
cat_desc_2.rename(index={
    "count": "Count",
    "unique": "Number of unique values",
    "top": "Most frequent value",
    "freq": "Most frequent value (frequency)"
}, inplace=True)

# Add least frequent value and its frequency
cat_desc_2.loc["Least frequent value"] = amazon_df.apply(lambda x: x.value_counts().idxmin())
cat_desc_2.loc["Least frequent value (frequency)"] = amazon_df.apply(lambda x: x.value_counts().min())

# Display the result
cat_desc_2

Unnamed: 0,Order_ID,Order_Date,Order_Time,Pickup_Time,Weather,Traffic,Vehicle,Area,Category
Count,43739,43739,43739,43739,43648,43739,43739,43739,43739
Number of unique values,43739,44,177,193,6,5,4,4,16
Most frequent value,ialx566343618,2022-03-15,21:55:00,21:30:00,Fog,Low,motorcycle,Metropolitian,Electronics
Most frequent value (frequency),1,1141,460,481,7440,14999,25527,32698,2849
Least frequent value,ialx566343618,2022-02-18,16:30:00,16:20:00,Sunny,,bicycle,Semi-Urban,Shoes
Least frequent value (frequency),1,819,51,36,7078,91,15,152,2666


In [119]:
#converting to markdown format
markdown_table2_3_2 = cat_desc_2.to_markdown()

# Print the markdown tables
print(markdown_table2_3_2)

|                                  | Order_ID      | Order_Date   | Order_Time   | Pickup_Time   | Weather   | Traffic   | Vehicle    | Area          | Category    |
|:---------------------------------|:--------------|:-------------|:-------------|:--------------|:----------|:----------|:-----------|:--------------|:------------|
| Count                            | 43739         | 43739        | 43739        | 43739         | 43648     | 43739     | 43739      | 43739         | 43739       |
| Number of unique values          | 43739         | 44           | 177          | 193           | 6         | 5         | 4          | 4             | 16          |
| Most frequent value              | ialx566343618 | 2022-03-15   | 21:55:00     | 21:30:00      | Fog       | Low       | motorcycle | Metropolitian | Electronics |
| Most frequent value (frequency)  | 1             | 1141         | 460          | 481           | 7440      | 14999     | 25527      | 32698         | 2849        |
| Le

In [120]:
# Get basic descriptive stats for categorical columns
cat_desc_2 = amazon_df.describe(include="object")

# Rename index for clarity
cat_desc_2.rename(index={
    "count": "Count",
    "unique": "Number of unique values",
    "top": "Most frequent value",
    "freq": "Most frequent value (frequency)"
}, inplace=True)

# Initialize lists to store least frequent values and their frequencies
least_values = []
least_freqs = []

# Loop through each categorical column to find least frequent values
for col in amazon_df.select_dtypes(include="object").columns:
    value_counts = amazon_df[col].value_counts(dropna=False)
    least_values.append(value_counts.index[-1])
    least_freqs.append(value_counts.iloc[-1])

# Add least frequent values and their frequencies to the DataFrame
cat_desc_2.loc["Least frequent value"] = least_values
cat_desc_2.loc["Least frequent value (frequency)"] = least_freqs

# Display the result
print(cat_desc_2.to_string())


                                       Order_ID  Order_Date Order_Time Pickup_Time Weather Traffic      Vehicle            Area     Category
Count                                     43739       43739      43739       43739   43648   43739        43739           43739        43739
Number of unique values                   43739          44        177         193       6       5            4               4           16
Most frequent value               ialx566343618  2022-03-15   21:55:00    21:30:00     Fog    Low   motorcycle   Metropolitian   Electronics
Most frequent value (frequency)               1        1141        460         481    7440   14999        25527           32698         2849
Least frequent value              nsyz997960170  2022-02-18   16:30:00    16:20:00     NaN    NaN      bicycle      Semi-Urban         Shoes
Least frequent value (frequency)              1         819         51          36      91      91           15             152         2666


In [121]:
#converting to markdown format
markdown_table2_3_2 = cat_desc_2.to_markdown()

# Print the markdown tables
print(markdown_table2_3_2)

|                                  | Order_ID      | Order_Date   | Order_Time   | Pickup_Time   | Weather   | Traffic   | Vehicle    | Area          | Category    |
|:---------------------------------|:--------------|:-------------|:-------------|:--------------|:----------|:----------|:-----------|:--------------|:------------|
| Count                            | 43739         | 43739        | 43739        | 43739         | 43648     | 43739     | 43739      | 43739         | 43739       |
| Number of unique values          | 43739         | 44           | 177          | 193           | 6         | 5         | 4          | 4             | 16          |
| Most frequent value              | ialx566343618 | 2022-03-15   | 21:55:00     | 21:30:00      | Fog       | Low       | motorcycle | Metropolitian | Electronics |
| Most frequent value (frequency)  | 1             | 1141         | 460          | 481           | 7440      | 14999     | 25527      | 32698         | 2849        |
| Le

In [122]:
# Missing values per column
missing_count = amazon_df.isnull().sum()
missing_percent = (missing_count / len(amazon_df)) * 100

# Duplicated rows
duplicated_count = amazon_df.duplicated().sum()
duplicated_percent = (duplicated_count / len(amazon_df)) * 100

# Create summary DataFrame for Markdown
missing_amazon_df = pd.DataFrame({
    "Column name": amazon_df.columns,
    "Missing count": missing_count,
    "% Missing": missing_percent
})

# Print table
print(missing_amazon_df)

# Print summary
print(f"\n**Total missing values:** {missing_count.sum()}")
print(f"**Percentage of dataset affected:** {(missing_count.sum() / (len(amazon_df)*len(amazon_df.columns)) * 100):.2f}%")
print(f"**Duplicated rows found:** {duplicated_count}")
print(f"**Percentage of rows in dataset affected:** {duplicated_percent:.2f}%")

                     Column name  Missing count  % Missing
Order_ID                Order_ID              0   0.000000
Agent_Age              Agent_Age              0   0.000000
Agent_Rating        Agent_Rating             54   0.123460
Store_Latitude    Store_Latitude              0   0.000000
Store_Longitude  Store_Longitude              0   0.000000
Drop_Latitude      Drop_Latitude              0   0.000000
Drop_Longitude    Drop_Longitude              0   0.000000
Order_Date            Order_Date              0   0.000000
Order_Time            Order_Time              0   0.000000
Pickup_Time          Pickup_Time              0   0.000000
Weather                  Weather             91   0.208052
Traffic                  Traffic              0   0.000000
Vehicle                  Vehicle              0   0.000000
Area                        Area              0   0.000000
Delivery_Time      Delivery_Time              0   0.000000
Category                Category              0   0.0000

In [123]:
#Convert to markdown format
print(missing_amazon_df.to_markdown(index=False))
# Print summary
print(f"\n**Total missing values:** {missing_count.sum()}")
print(f"**Percentage of dataset affected:** {(missing_count.sum() / (len(amazon_df)*len(amazon_df.columns)) * 100):.2f}%")
print(f"**Duplicated rows found:** {duplicated_count}")
print(f"**Percentage of rows in dataset affected:** {duplicated_percent:.2f}%")

| Column name     |   Missing count |   % Missing |
|:----------------|----------------:|------------:|
| Order_ID        |               0 |    0        |
| Agent_Age       |               0 |    0        |
| Agent_Rating    |              54 |    0.12346  |
| Store_Latitude  |               0 |    0        |
| Store_Longitude |               0 |    0        |
| Drop_Latitude   |               0 |    0        |
| Drop_Longitude  |               0 |    0        |
| Order_Date      |               0 |    0        |
| Order_Time      |               0 |    0        |
| Pickup_Time     |               0 |    0        |
| Weather         |              91 |    0.208052 |
| Traffic         |               0 |    0        |
| Vehicle         |               0 |    0        |
| Area            |               0 |    0        |
| Delivery_Time   |               0 |    0        |
| Category        |               0 |    0        |

**Total missing values:** 145
**Percentage of dataset affected: