<a href="https://colab.research.google.com/github/TMPatipolaarachchi/Real-Time-AI-IoT-Elephant-Detection-and-Acoustic-Deterrent-for-Sri-Lankan-Railways.-/blob/Risk_Prediction/TrainModelForRisk.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [72]:
from google.colab import drive
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

drive.mount('/content/drive')
file_path = '/content/drive/My Drive/elephant_risk_dataset_v3.csv'

# Load a fresh copy of the data
df = pd.read_csv(file_path)
print(df.head())

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
   distance_km  train_speed_kmh behavioral_state  elephant_count  \
0         3.81               34             calm              25   
1         6.54               72             calm               3   
2         1.90               40             calm              50   
3         5.30               61       aggressive              63   
4         1.00               81       aggressive               4   

  social_structure weather risk_level  
0             herd   rainy       high  
1             herd     dry        low  
2             herd   rainy       high  
3             herd   rainy       high  
4             herd     dry       high  


In [73]:
# Initialize encoders
le_behavior = LabelEncoder()
le_social = LabelEncoder()
le_weather = LabelEncoder()
le_risk = LabelEncoder()

# Apply encoding - explicitly convert to string to prevent type errors
df['behavioral_state'] = le_behavior.fit_transform(df['behavioral_state'].astype(str))
df['social_structure'] = le_social.fit_transform(df['social_structure'].astype(str))
df['weather'] = le_weather.fit_transform(df['weather'].astype(str))
df['risk_level'] = le_risk.fit_transform(df['risk_level'].astype(str))

# Define Features (X) and Target (y)
X = df.drop('risk_level', axis=1)
y = df['risk_level']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Preprocessing completed!")

Preprocessing completed!


In [74]:
# Initialize and train the model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

print("Model training completed!")

Model training completed!


In [79]:

# Make predictions
y_pred = rf_model.predict(X_test)

# Convert classes to strings for the report
risk_classes_str = [str(cls) for cls in le_risk.classes_]

print(f"Overall Accuracy: {accuracy_score(y_test, y_pred) * 100:.2f}%")
print("\nDetailed Classification Report:")
print(classification_report(y_test, y_pred, target_names=risk_classes_str))

Overall Accuracy: 98.33%

Detailed Classification Report:
              precision    recall  f1-score   support

        high       0.97      1.00      0.98        29
         low       1.00      1.00      1.00        25
      medium       1.00      0.83      0.91         6

    accuracy                           0.98        60
   macro avg       0.99      0.94      0.96        60
weighted avg       0.98      0.98      0.98        60



In [76]:
# Save the model
joblib.dump(rf_model, '/content/drive/My Drive/elephant_risk_model.pkl')

# Save encoders as a dictionary
encoders = {
    'behavior': le_behavior,
    'social': le_social,
    'weather': le_weather,
    'risk': le_risk
}
joblib.dump(encoders, '/content/drive/My Drive/risk_encoders.pkl')

print("Model and Encoders saved to your Drive.")

Model and Encoders saved to your Drive.


In [77]:
# Load assets
model = joblib.load('/content/drive/My Drive/elephant_risk_model.pkl')
encoders = joblib.load('/content/drive/My Drive/risk_encoders.pkl')

def get_realtime_risk(distance, speed, behavior, count, structure, weather):
    # 1. Prepare text inputs (Clean them)
    b = str(behavior).lower().strip()
    s = str(structure).lower().strip()
    w = str(weather).lower().strip()

    # 2. Transform text to numbers using the encoders
    # We use [0] because .transform returns an array
    b_encoded = encoders['behavior'].transform([b])[0]
    s_encoded = encoders['social'].transform([s])[0]
    w_encoded = encoders['weather'].transform([w])[0]

    # 3. Prepare the input data in a DataFrame with correct column names
    input_data = pd.DataFrame([[
        float(distance),
        float(speed),
        b_encoded,
        int(count),
        s_encoded,
        w_encoded
    ]], columns=['distance_km', 'train_speed_kmh', 'behavioral_state',
                 'elephant_count', 'social_structure', 'weather'])

    # 4. Make the prediction
    prediction_numeric = model.predict(input_data)

    # 5. Convert numeric back to label
    risk_label = encoders['risk'].inverse_transform(prediction_numeric)

    return risk_label[0]

In [78]:
# TEST 1: Distance is very close (Rule: Should be HIGH regardless of others)
print(f"Scenario 1 Risk: {get_realtime_risk(0.4, 30, 'aggressive', 9, 'herd', 'dry').upper()}")

# TEST 2: Far away, slow speed, calm (Rule: Should be LOW)
print(f"Scenario 2 Risk: {get_realtime_risk(9.0, 78, 'calm', 1, 'single', 'rainy').upper()}")

# TEST 3: Far away, but aggressive and high speed (Rule: Should be HIGH)
print(f"Scenario 3 Risk: {get_realtime_risk(8.5, 45, 'aggressive', 1, 'single', 'dry').upper()}")

# TEST 4: Medium distance, medium speed (Rule: Should be MEDIUM)
print(f"Scenario 4 Risk: {get_realtime_risk(5, 75, 'calm', 4, 'herd', 'rainy').upper()}")

# --- RUN TESTS ---
print(f"Scenario 1 Risk: {get_realtime_risk(0.4, 30, 'aggressive', 9, 'herd', 'dry').upper()}")
print(f"Scenario 2 Risk: {get_realtime_risk(9.0, 35, 'calm', 1, 'single', 'dry').upper()}")

Scenario 1 Risk: HIGH
Scenario 2 Risk: LOW
Scenario 3 Risk: LOW
Scenario 4 Risk: HIGH
Scenario 1 Risk: HIGH
Scenario 2 Risk: LOW
