# 📦 Shipment Delay Prediction Using ML
This notebook demonstrates how to predict shipment delays using geographical and news-based data.

In [3]:
import pandas as pd

# Load the dataset
df = pd.read_csv("shipment_data.csv")
df.head()


Unnamed: 0,shipment_id,source,destination,ship_date,delivery_date,weather_source,weather_destination,news_score,delay_in_days
0,S001,Mumbai,Delhi,2024-05-01,2024-05-03,Clear,Rain,0.7,2
1,S002,Delhi,Bangalore,2024-05-02,2024-05-04,Clear,Clear,0.1,0
2,S003,Hyderabad,Chennai,2024-05-01,2024-05-05,Rain,Rain,0.8,1
3,S004,Mumbai,Kolkata,2024-05-03,2024-05-06,Clear,Storm,0.9,3
4,S005,Delhi,Mumbai,2024-05-04,2024-05-05,Clear,Clear,0.2,0


## 📰 News Risk Score Extraction using NewsAPI

In [4]:
import requests
import datetime

def fetch_news_score(city_name, keywords=['strike', 'accident', 'protest'], days=3):
    api_key = "YOUR_NEWSAPI_KEY"
    endpoint = "https://newsapi.org/v2/everything"
    from_date = (datetime.datetime.today() - datetime.timedelta(days=days)).strftime("%Y-%m-%d")

    query = f"{city_name} AND ({' OR '.join(keywords)})"

    params = {
        'q': query,
        'from': from_date,
        'sortBy': 'relevancy',
        'language': 'en',
        'apiKey': api_key
    }

    response = requests.get(endpoint, params=params)
    data = response.json()

    if data["status"] == "ok":
        article_count = len(data["articles"])
        return min(article_count / 10.0, 1.0)
    else:
        return 0

# Example
print(fetch_news_score("Mumbai"))


0


## 🌦️ Weather Data Integration using OpenWeatherMap API

In [5]:
import requests

def fetch_weather(city_name):
    api_key = "YOUR_OPENWEATHERMAP_KEY"
    endpoint = f"http://api.openweathermap.org/data/2.5/weather?q={city_name}&appid={api_key}&units=metric"

    response = requests.get(endpoint)
    if response.status_code == 200:
        data = response.json()
        weather = data['weather'][0]['main']
        temp = data['main']['temp']
        return weather, temp
    else:
        return None, None

# Example
print(fetch_weather("Delhi"))


(None, None)


## 🚗 Traffic Delay Estimation using Google Maps API

In [6]:
import requests

def get_traffic_time(source, destination):
    api_key = "YOUR_GOOGLE_MAPS_API_KEY"
    endpoint = "https://maps.googleapis.com/maps/api/distancematrix/json"

    params = {
        'origins': source,
        'destinations': destination,
        'departure_time': 'now',
        'key': api_key
    }

    response = requests.get(endpoint, params=params)
    if response.status_code == 200:
        data = response.json()
        try:
            duration = data['rows'][0]['elements'][0]['duration_in_traffic']['text']
            return duration
        except:
            return "Unavailable"
    else:
        return "Error"

# Example
print(get_traffic_time("Mumbai", "Delhi"))


Unavailable


## 🧠 Train ML Model to Predict Delays

In [7]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Create binary delay label
df['delay_label'] = (df['delay_in_days'] > 0).astype(int)

# Encode categorical features
df = pd.get_dummies(df, columns=['source', 'destination', 'weather_source', 'weather_destination'])

# Features and target
X = df.drop(columns=["shipment_id", "ship_date", "delivery_date", "delay_in_days", "delay_label"])
y = df["delay_label"]

# Train/Test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1

    accuracy                           1.00         1
   macro avg       1.00      1.00      1.00         1
weighted avg       1.00      1.00      1.00         1

