# ========================================================
# IBM Cloud Smart Farming AI Agent with SF24 Dataset
# ========================================================

In [2]:
import os, types
import pandas as pd
from botocore.client import Config
import ibm_boto3
import numpy as np
from scipy.spatial.distance import cosine
from ibm_watsonx_ai.foundation_models.embeddings import Embeddings
from ibm_watsonx_ai.foundation_models import Model
from ibm_watsonx_ai.foundation_models.utils.enums import ModelTypes
from ibm_watsonx_ai.metanames import GenTextParamsMetaNames as GenParams
import json

# Load the Dataset from IBM Cloud Object Storage

In [3]:
def __iter__(self): return 0

cos_client = ibm_boto3.client(service_name='s3',
    ibm_api_key_id='0ZP9KvojKPudwCyTK5d9hN63ujeHkvzTSilMjqCuRL0z',
    ibm_auth_endpoint="https://iam.cloud.ibm.com/identity/token",
    config=Config(signature_version='oauth'),
    endpoint_url='https://s3.direct.eu-gb.cloud-object-storage.appdomain.cloud')

bucket = 'aifarmingadvice-donotdelete-pr-k3buvqggtf3o2s'
object_key = 'Smart_Farming_Data_2024.csv'

body = cos_client.get_object(Bucket=bucket,Key=object_key)['Body']
if not hasattr(body, "__iter__"): 
    body.__iter__ = types.MethodType(__iter__, body)

sf24_df = pd.read_csv(body)
print("SF24 dataset loaded. Shape:", sf24_df.shape)
print("Preview:")
print(sf24_df.head(3))

SF24 dataset loaded. Shape: (2200, 23)
Preview:
    N   P   K  temperature   humidity        ph    rainfall label  \
0  90  42  43    20.879744  82.002744  6.502985  202.935536  rice   
1  85  58  41    21.770462  80.319644  7.038096  226.655537  rice   
2  60  55  44    23.004459  82.320763  7.840207  263.964248  rice   

   soil_moisture  soil_type  ...  organic_matter  irrigation_frequency  \
0      29.446064          2  ...        3.121395                     4   
1      12.851183          3  ...        2.142021                     4   
2      29.363913          2  ...        1.474974                     1   

   crop_density  pest_pressure  fertilizer_usage  growth_stage  \
0     11.743910      57.607308        188.194958             1   
1     16.797101      74.736879         70.963629             1   
2     12.654395       1.034478        191.976077             1   

   urban_area_proximity  water_source_type  frost_risk  water_usage_efficiency  
0              2.719614         

# Prepare the Knowledge Base for RAG

In [4]:
# Transform your SF24 dataset into knowledge entries for RAG system
def create_farming_knowledge_from_sf24(df):
    """Convert SF24 dataset into RAG-ready knowledge entries"""
    knowledge_entries = []
    
    for idx, row in df.iterrows():
        # Create comprehensive farming advice entries
        content = f"""
For {row['label']} cultivation: Optimal soil conditions require Nitrogen {row['N']}ppm, Phosphorus {row['P']}ppm, Potassium {row['K']}ppm. 
Environmental requirements: Temperature {row['temperature']:.1f}°C, humidity {row['humidity']:.1f}%, pH {row['ph']:.2f}, rainfall {row['rainfall']:.1f}mm.
Soil management: Moisture level {row['soil_moisture']:.1f}%, organic matter {row['organic_matter']:.2f}%, soil type {row['soil_type']}.
Farming practices: Irrigation frequency {row['irrigation_frequency']} days, fertilizer usage {row['fertilizer_usage']:.1f}kg/ha.
Growth conditions: Crop density {row['crop_density']:.1f}, pest pressure {row['pest_pressure']:.1f}, growth stage {row['growth_stage']}.
Water management: Usage efficiency {row['water_usage_efficiency']:.2f}, water source type {row['water_source_type']}, frost risk {row['frost_risk']:.1f}%.
Location factors: Urban proximity {row['urban_area_proximity']:.1f}km.
        """.strip()
        
        knowledge_entries.append({
            "id": f"sf24_{idx}",
            "category": "crop_guidance",
            "crop": row['label'].lower(),
            "content": content,
            "keywords": [row['label'].lower(), "soil", "nutrients", "environmental", "farming"],
            "source": "Smart Farming Data 2024"
        })
    
    return knowledge_entries

# Create knowledge base
sf24_knowledge = create_farming_knowledge_from_sf24(sf24_df)
print(f"Created {len(sf24_knowledge)} knowledge entries from SF24 dataset")

# Preview first entry
print(f"\n Sample Knowledge Entry:")
print(f"Crop: {sf24_knowledge[0]['crop']}")
print(f"Content: {sf24_knowledge[0]['content'][:200]}...")


Created 2200 knowledge entries from SF24 dataset

 Sample Knowledge Entry:
Crop: rice
Content: For rice cultivation: Optimal soil conditions require Nitrogen 90ppm, Phosphorus 42ppm, Potassium 43ppm. 
Environmental requirements: Temperature 20.9°C, humidity 82.0%, pH 6.50, rainfall 202.9mm.
Soi...


# Set up watsonx.ai Embeddings and Granite Model

In [6]:
from ibm_watsonx_ai.foundation_models.embeddings import Embeddings
from ibm_watsonx_ai.foundation_models import ModelInference
from ibm_watsonx_ai.foundation_models.utils.enums import ModelTypes
from ibm_watsonx_ai.metanames import GenTextParamsMetaNames as GenParams

# Set up your watsonx.ai credentials
watsonx_credentials = {
    "apikey": "ZI6Cjr2YpcPN78n_iyCmbZt1i20E0CZUeKYRF2eItqa4",  # Replace with your API key
    "url": "https://eu-gb.ml.cloud.ibm.com"
}
project_id = "e9eee040-6177-4fa5-bab1-eb98ba7952d5" 

# Initialize embedding model for RAG retrieval
embedding_model = Embeddings(
    model_id="ibm/slate-125m-english-rtrvr",
    credentials=watsonx_credentials,
    project_id=project_id
)

granite_model = ModelInference(
    model_id="ibm/granite-3-8b-instruct" ,
    credentials=watsonx_credentials,
    project_id=project_id,
    params={
        GenParams.DECODING_METHOD: "greedy",
        GenParams.MAX_NEW_TOKENS: 400,
        GenParams.TEMPERATURE: 0.3,
        GenParams.REPETITION_PENALTY: 1.1
    }
)

print("watsonx.ai models initialized successfully")


watsonx.ai models initialized successfully


# Create Embeddings for Knowledge Base

In [7]:
import numpy as np
from scipy.spatial.distance import cosine
import time

# Create embeddings for your SF24 knowledge base
print("🔄 Creating embeddings for SF24 knowledge base...")
start_time = time.time()

# Extract content for embedding (limit to first 500 for demo)
demo_knowledge = sf24_knowledge[:500]  # Adjust size based on your needs
documents = [entry['content'] for entry in demo_knowledge]

# Generate embeddings in batches to avoid timeout
batch_size = 50
all_embeddings = []

for i in range(0, len(documents), batch_size):
    batch = documents[i:i+batch_size]
    batch_embeddings = embedding_model.embed_documents(batch)
    all_embeddings.extend(batch_embeddings)
    print(f"✅ Processed batch {i//batch_size + 1}/{(len(documents)-1)//batch_size + 1}")

doc_embeddings = np.array(all_embeddings)
print(f"✅ Created embeddings for {len(demo_knowledge)} knowledge entries")
print(f"⏱️ Time taken: {time.time() - start_time:.2f} seconds")


🔄 Creating embeddings for SF24 knowledge base...
✅ Processed batch 1/10
✅ Processed batch 2/10
✅ Processed batch 3/10
✅ Processed batch 4/10
✅ Processed batch 5/10
✅ Processed batch 6/10
✅ Processed batch 7/10
✅ Processed batch 8/10
✅ Processed batch 9/10
✅ Processed batch 10/10
✅ Created embeddings for 500 knowledge entries
⏱️ Time taken: 5.86 seconds


# Implement Complete RAG Pipeline

In [8]:
class SmartFarmingAIAgent:
    def __init__(self, knowledge_entries, embeddings, embedding_model, granite_model):
        self.knowledge_entries = knowledge_entries
        self.doc_embeddings = embeddings
        self.embedding_model = embedding_model
        self.granite_model = granite_model
    
    def retrieve_relevant_knowledge(self, query, top_k=3):
        """Retrieve most relevant farming knowledge for the query"""
        # Embed the query
        query_embedding = self.embedding_model.embed_query(query)
        
        # Calculate similarities
        similarities = []
        for idx, doc_embedding in enumerate(self.doc_embeddings):
            similarity = 1 - cosine(query_embedding, doc_embedding)
            similarities.append((similarity, idx))
        
        # Get top-k most relevant entries
        top_entries = sorted(similarities, reverse=True)[:top_k]
        relevant_knowledge = []
        
        for similarity, idx in top_entries:
            entry = self.knowledge_entries[idx].copy()
            entry['relevance_score'] = similarity
            relevant_knowledge.append(entry)
        
        return relevant_knowledge
    
    def generate_farming_advice(self, query, language="english"):
        """Generate comprehensive farming advice using RAG"""
        # Step 1: Retrieve relevant knowledge
        relevant_knowledge = self.retrieve_relevant_knowledge(query, top_k=3)
        
        # Step 2: Format context
        context = "\n\n".join([
            f"[SF24 Data - {entry['crop'].title()}] {entry['content']}"
            for entry in relevant_knowledge
        ])
        
        # Step 3: Create specialized farming prompt
        system_prompt = """You are an expert agricultural advisor for farmers in West Bengal, India. 
You provide practical, data-driven farming advice based on the Smart Farming Data 2024 dataset.
Your advice should be specific, actionable, and consider local farming conditions."""
        
        if language == "bengali":
            user_instruction = "বাংলায় উত্তর দিন।"
        else:
            user_instruction = "Provide clear advice in simple English."
        
        prompt = f"""{system_prompt}

Agricultural Data Context (from Smart Farming Data 2024):
{context}

Farmer's Question: {query}

Instructions: {user_instruction}
Consider soil conditions, environmental factors, nutrient requirements, and farming practices from the data.

Expert Advice:"""
        
        # Step 4: Generate response
        try:
            advice = self.granite_model.generate_text(prompt=prompt)
            
            return {
                "question": query,
                "advice": advice,
                "relevant_crops": [entry['crop'] for entry in relevant_knowledge],
                "confidence": np.mean([entry['relevance_score'] for entry in relevant_knowledge]),
                "data_sources": len(relevant_knowledge),
                "language": language
            }
        except Exception as e:
            return {
                "question": query,
                "advice": f"I apologize, I'm having trouble generating advice right now. Error: {str(e)}",
                "error": True
            }
    
    def get_crop_recommendation(self, soil_conditions):
        """Get crop recommendations based on soil conditions"""
        query = f"What crop is best for soil with N:{soil_conditions.get('N', 50)}, P:{soil_conditions.get('P', 30)}, K:{soil_conditions.get('K', 40)}, pH:{soil_conditions.get('ph', 6.5)}, moisture:{soil_conditions.get('moisture', 25)}%?"
        return self.generate_farming_advice(query)

# Initialize your Smart Farming AI Agent
farming_agent = SmartFarmingAIAgent(
    knowledge_entries=demo_knowledge,
    embeddings=doc_embeddings,
    embedding_model=embedding_model,
    granite_model=granite_model
)

print("✅ Smart Farming AI Agent initialized successfully!")


✅ Smart Farming AI Agent initialized successfully!


# Test the AI Agent

In [9]:
# Test with sample farming questions
test_queries = [
    "What fertilizer should I use for rice cultivation?",
    "My soil has pH 6.2 and 30% moisture. Which crop is best?",
    "How to manage pest pressure in wheat farming?",
    "What is the optimal irrigation frequency for cotton?",
    "আমার ধানের জমিতে কী সার ব্যবহার করব?"  # Bengali query
]

print(" Testing Smart Farming AI Agent:")
print("=" * 60)

for i, query in enumerate(test_queries, 1):
    print(f"\n{i}. Testing Query: {query}")
    
    # Detect language for Bengali queries
    language = "bengali" if any(char in query for char in ['আ', 'ব', 'ক', 'ম', 'ধ']) else "english"
    
    result = farming_agent.generate_farming_advice(query, language)
    
    print(f"   Relevant Crops: {result['relevant_crops']}")
    print(f"   Confidence: {result['confidence']:.3f}")
    print(f"   Advice: {result['advice'][:150]}...")
    print("-" * 40)


 Testing Smart Farming AI Agent:

1. Testing Query: What fertilizer should I use for rice cultivation?
   Relevant Crops: ['rice', 'rice', 'rice']
   Confidence: 0.791
   Advice: 
Based on the Smart Farming Data 2024, you should use a balanced fertilizer that provides Nitrogen, Phosphorus, and Potassium at the optimal levels re...
----------------------------------------

2. Testing Query: My soil has pH 6.2 and 30% moisture. Which crop is best?
   Relevant Crops: ['maize', 'maize', 'maize']
   Confidence: 0.706
   Advice: 
Based on the provided data, none of the given maize datasets perfectly match your soil conditions. However, considering your soil pH (6.2) falls with...
----------------------------------------

3. Testing Query: How to manage pest pressure in wheat farming?
   Relevant Crops: ['pigeonpeas', 'pigeonpeas', 'pigeonpeas']
   Confidence: 0.579
   Advice: 

To manage pest pressure in wheat farming, consider the following strategies tailored to your local conditions in We