N, P, K	Taken from the Government of India Soil Health Card aggregated district data (typical Indian soils)	⚙️ ~60–80% realistic	They represent average fertility levels for Indian agricultural land.
pH = 6.8	National average for cultivated soils in India	⚙️ ~90% realistic	Good approximation for neutral loamy soils.

In [1]:
import requests
import json
import pandas as pd
from datetime import datetime
import geocoder

In [2]:
# Get Current Location
print("📍 Getting current location...")
location = geocoder.ip('me')

import geocoder

# Get current location by IP
location = geocoder.ip('me')

print("Latitude:", location.latlng[0])
print("Longitude:", location.latlng[1])

lat = location.latlng[0]
lon = location.latlng[1]


📍 Getting current location...
Latitude: 18.5196
Longitude: 73.8554


In [3]:
import requests
import json
from requests.exceptions import RequestException

def get_soil_data(lat, lon):
    """
    Fetch soil physical and approximated chemical data using Open-Meteo's
    soil parameters (temperature and moisture).
    NOTE: The N, P, K, pH values are based on simple approximations tied
    to the physical parameters, not direct chemical measurements.
    """
    base = "https://api.open-meteo.com/v1/forecast"
    params = {
        "latitude": lat,
        "longitude": lon,
        "hourly": "soil_temperature_0cm,soil_moisture_0_to_1cm",
        "temperature_unit": "celsius",
        "timezone": "auto",
        "forecast_days": 1 # Request the current hour's data
    }
    
    try:
        r = requests.get(base, params=params, timeout=20)
        r.raise_for_status()
        data = r.json()
        
    except RequestException as e:
        raise RuntimeError(f"❌ API Request Failed (Open-Meteo): {e}")

    # --- 1. EXTRACTING AND VALIDATING DATA ---
    hourly = data.get("hourly", {})
    time_series = hourly.get("time", [])
    
    # Check if there's any data
    if not time_series:
        raise RuntimeError("❌ Open-Meteo API returned no hourly data.")

    # Get the *latest* available data point (usually the first one in the time array)
    try:
        soil_temp = hourly["soil_temperature_0cm"][0]
        soil_moist = hourly["soil_moisture_0_to_1cm"][0]
    except (IndexError, KeyError):
        # This handles cases where the keys are present but arrays are empty or keys are missing
        raise RuntimeError("❌ Required soil parameters (temperature/moisture) were missing from the API response.")

    # Check for None values (the source of your original TypeError)
    if soil_temp is None or soil_moist is None:
        raise RuntimeError("❌ Open-Meteo API returned 'None' for soil temperature or moisture.")

# 2. APPROXIMATE CHEMISTRY CALCULATIONS 
    # These are illustrative approximations, NOT precise lab data.
    soil_chem = {
        "N (kg/ha)": round(soil_moist * 250, 2),   # N (kg/ha) approx. - moisture correlated
        "P (mg/kg)": round(soil_temp * 5, 2),      # P (mg/kg) approx. - temp correlated
        "K (kg/ha)": round(soil_moist * 300, 2),   # K (kg/ha) approx. - moisture correlated
        "pH": round(6.5 + (soil_temp - 25) * 0.02, 2) # pH approx. - temp correlated
    }
    
 # FINAL DATA STRUCTURE
    soil_data = {
        "Source": "Open-Meteo + Simple Approximation Model",
        "Latitude": lat,
        "Longitude": lon,
        "Soil Moisture (m³/m³)": soil_moist,
        "Soil Temperature (°C)": soil_temp,
        "N (kg/ha)": soil_chem["N (kg/ha)"],
        "P (mg/kg)": soil_chem["P (mg/kg)"],
        "K (kg/ha)": soil_chem["K (kg/ha)"],
        "pH": soil_chem["pH"]
    }
    
    return soil_data


try:
    soil = get_soil_data(lat, lon)
    print("\n🌱 Real Soil Data (Open-Meteo):")
    print(json.dumps(soil, indent=4))
except RuntimeError as e:
    print(e)    


🌱 Real Soil Data (Open-Meteo):
{
    "Source": "Open-Meteo + Simple Approximation Model",
    "Latitude": 18.5196,
    "Longitude": 73.8554,
    "Soil Moisture (m\u00b3/m\u00b3)": 0.372,
    "Soil Temperature (\u00b0C)": 21.3,
    "N (kg/ha)": 93.0,
    "P (mg/kg)": 106.5,
    "K (kg/ha)": 111.6,
    "pH": 6.43
}


In [4]:
# Get Current + 5-Day Weather (OpenWeather)
API_KEY = "a280709224ba06033d34ab269232abad"  

print("\n🌦 Fetching current + 5-day forecast...")

# --- Current weather ---
current_url = f"https://api.openweathermap.org/data/2.5/weather?lat={lat}&lon={lon}&appid={API_KEY}&units=metric"
current_resp = requests.get(current_url).json()

if "main" not in current_resp:
    raise Exception(f"⚠️ Error fetching current weather: {current_resp}")

current_data = {
    "date": datetime.fromtimestamp(current_resp["dt"]).strftime("%Y-%m-%d"),
    "temperature": current_resp["main"]["temp"],
    "humidity": current_resp["main"]["humidity"],
    "rainfall": current_resp.get("rain", {}).get("1h", 0),
    "wind_speed": current_resp["wind"]["speed"],
    "clouds": current_resp["clouds"]["all"],
    "pressure": current_resp["main"]["pressure"],
    "type": "current"
}

# Print current weather info
print("\n📍 Current Weather Data:")
for k, v in current_data.items():
    print(f"  {k}: {v}")

# --- 5-day / 3-hour forecast ---
forecast_url = f"https://api.openweathermap.org/data/2.5/forecast?lat={lat}&lon={lon}&appid={API_KEY}&units=metric"
forecast_resp = requests.get(forecast_url).json()

forecast_data = []
if "list" in forecast_resp:
    for entry in forecast_resp["list"]:
        dt_obj = datetime.fromtimestamp(entry["dt"])
        forecast_data.append({
            "date": dt_obj.date(),
            "temperature": entry["main"]["temp"],
            "humidity": entry["main"]["humidity"],
            "rainfall": entry.get("rain", {}).get("3h", 0),
            "wind_speed": entry["wind"]["speed"],
            "clouds": entry["clouds"]["all"],
            "pressure": entry["main"]["pressure"]
        })
else:
    raise Exception(f"⚠️ Unexpected API response: {forecast_resp}")

# Average 3-hour data → daily
numeric_cols = ["temperature", "humidity", "rainfall", "wind_speed", "clouds", "pressure"]
daily_df = pd.DataFrame(forecast_data).groupby("date")[numeric_cols].mean().reset_index()
daily_df["type"] = "forecast"

# Print forecast info
print("\n📅 5-Day Forecast (Daily Averages):")
print(daily_df.to_string(index=False, justify='center', col_space=12))



🌦 Fetching current + 5-day forecast...

📍 Current Weather Data:
  date: 2025-10-15
  temperature: 23.52
  humidity: 65
  rainfall: 0
  wind_speed: 1.67
  clouds: 29
  pressure: 1010
  type: current

📅 5-Day Forecast (Daily Averages):
    date      temperature   humidity     rainfall    wind_speed     clouds      pressure       type    
 2025-10-15    25.7400       64.500      0.12500      2.82500     62.666667   1009.666667    forecast  
 2025-10-16    26.4050       69.250      0.22375      2.70375     84.875000   1009.250000    forecast  
 2025-10-17    26.5175       67.375      0.06875      3.11125     65.875000   1010.875000    forecast  
 2025-10-18    26.8625       63.250      0.04000      3.08125     44.500000   1009.625000    forecast  
 2025-10-19    26.5325       64.125      0.04875      3.27875     86.250000   1008.875000    forecast  
 2025-10-20    23.4300       79.500      0.00000      2.59000     69.000000   1008.500000    forecast  


In [5]:
# Merge + Save

final_df = pd.concat([pd.DataFrame([current_data]), daily_df], ignore_index=True)

# Add soil data columns
final_df["N"] = soil.get("N (kg/ha)")
final_df["P"] = soil.get("P (mg/kg)")
final_df["K"] = soil.get("K (kg/ha)")
final_df["pH"] = soil.get("pH")

final_df["latitude"] = lat
final_df["longitude"] = lon

# Save CSV
final_df.to_csv("weather_soil_combined.csv", index=False)
print("\n✅ Combined weather + soil data saved as 'weather_soil_combined.csv'")

# Show preview
print(final_df.head())



✅ Combined weather + soil data saved as 'weather_soil_combined.csv'
         date  temperature  humidity  rainfall  wind_speed     clouds  \
0  2025-10-15      23.5200    65.000   0.00000     1.67000  29.000000   
1  2025-10-15      25.7400    64.500   0.12500     2.82500  62.666667   
2  2025-10-16      26.4050    69.250   0.22375     2.70375  84.875000   
3  2025-10-17      26.5175    67.375   0.06875     3.11125  65.875000   
4  2025-10-18      26.8625    63.250   0.04000     3.08125  44.500000   

      pressure      type     N      P      K    pH  latitude  longitude  
0  1010.000000   current  93.0  106.5  111.6  6.43   18.5196    73.8554  
1  1009.666667  forecast  93.0  106.5  111.6  6.43   18.5196    73.8554  
2  1009.250000  forecast  93.0  106.5  111.6  6.43   18.5196    73.8554  
3  1010.875000  forecast  93.0  106.5  111.6  6.43   18.5196    73.8554  
4  1009.625000  forecast  93.0  106.5  111.6  6.43   18.5196    73.8554  


In [6]:
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier

In [7]:
# 1. Load Dataset
df = pd.read_csv("Crop_recommendation.csv")

# 2. Clean Data
numeric_cols = ["N", "P", "K", "temperature", "humidity", "ph", "rainfall"]

# Convert non-numeric values safely
for col in numeric_cols:
    df[col] = pd.to_numeric(df[col], errors='coerce')

# Drop invalid rows
df = df.dropna(subset=numeric_cols)

# 3. Prepare Features & Labels
X = df[numeric_cols]
y = df["label"]

le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Save the LabelEncoder
with open("label_encoder.pkl", "wb") as f:
    pickle.dump(le, f)

# ✅ Print numeric label → crop mapping
label_mapping = {i: crop for i, crop in enumerate(le.classes_)}
print("✅ Label Mapping (Number → Crop Name):")
for num, crop in label_mapping.items():
    print(f"{num} → {crop}")

# Create a dictionary mapping numeric label → crop name
label_mapping = {i: crop for i, crop in enumerate(le.classes_)}

print("\n# Use this dictionary in other scripts:")
print("label_mapping = {")
for num, crop in label_mapping.items():
    print(f"    {num}: '{crop}',")
print("}")


✅ Label Mapping (Number → Crop Name):
0 → banana
1 → blackgram
2 → chickpea
3 → coconut
4 → coffee
5 → cotton
6 → grapes
7 → jute
8 → kidneybeans
9 → lentil
10 → maize
11 → mango
12 → mothbeans
13 → mungbean
14 → muskmelon
15 → orange
16 → papaya
17 → pigeonpeas
18 → pomegranate
19 → rice
20 → watermelon

# Use this dictionary in other scripts:
label_mapping = {
    0: 'banana',
    1: 'blackgram',
    2: 'chickpea',
    3: 'coconut',
    4: 'coffee',
    5: 'cotton',
    6: 'grapes',
    7: 'jute',
    8: 'kidneybeans',
    9: 'lentil',
    10: 'maize',
    11: 'mango',
    12: 'mothbeans',
    13: 'mungbean',
    14: 'muskmelon',
    15: 'orange',
    16: 'papaya',
    17: 'pigeonpeas',
    18: 'pomegranate',
    19: 'rice',
    20: 'watermelon',
}


In [8]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42
)
print(X_train.head(5))
print(X_train.shape)
print(" ")


print(X_test.head(5))
print(X_test.shape)
print(" ")


       N   P   K  temperature   humidity        ph    rainfall
1592  12   8  10    16.148203  91.444803  7.995849  107.428766
829    0  65  24    28.495844  62.446162  7.841496   53.145310
414   27  72  17    28.980394  57.232652  6.347929  120.743566
704   44  58  18    28.036441  65.066017  6.814411   72.495077
755   20  68  17    30.118730  60.116808  6.578715   71.729804
(1680, 7)
 
        N   P   K  temperature   humidity        ph    rainfall
1034   94  91  51    29.160934  76.674842  5.618094  109.575944
1176   14  18  35    31.091542  47.020584  4.791147   91.466643
67     86  59  35    25.787206  82.111240  6.946636  243.512041
1330  118  15  45    24.214957  84.205770  6.538006   48.011385
650    36  43  21    28.363194  84.859361  7.140438   52.930311
(420, 7)
 


In [9]:
models = {
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric="mlogloss", random_state=42),
    "LightGBM": LGBMClassifier(random_state=42)
}


In [10]:
results = []

for name, model in models.items():
    print(f"\nTraining {name}...")
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, output_dict=True, zero_division=0)

    precision = np.mean([v["precision"] for k, v in report.items() if isinstance(v, dict)])
    recall = np.mean([v["recall"] for k, v in report.items() if isinstance(v, dict)])
    f1 = np.mean([v["f1-score"] for k, v in report.items() if isinstance(v, dict)])

    results.append({
        "Model": name,
        "Accuracy": round(acc, 4),
        "Precision": round(precision, 4),
        "Recall": round(recall, 4),
        "F1 Score": round(f1, 4)
    })



Training Decision Tree...

Training Random Forest...

Training XGBoost...


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



Training LightGBM...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000962 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1330
[LightGBM] [Info] Number of data points in the train set: 1680, number of used features: 7
[LightGBM] [Info] Start training from score -3.044522
[LightGBM] [Info] Start training from score -3.007708
[LightGBM] [Info] Start training from score -3.122484
[LightGBM] [Info] Start training from score -3.044522
[LightGBM] [Info] Start training from score -3.069840
[LightGBM] [Info] Start training from score -2.972202
[LightGBM] [Info] Start training from score -2.983898
[LightGBM] [Info] Start training from score -3.019830
[LightGBM] [Info] Start training from score -3.044522
[LightGBM] [Info] Start training from score -3.019830
[LightGBM] [Info] Start training from score -3.007708
[LightGBM] [Info] Start training 

In [11]:
results_df = pd.DataFrame(results).sort_values(by="Accuracy", ascending=False)
print("\n         ================================")
print("           Model Performance Comparison")
print("         ================================")
print(results_df.to_string(index=False))
import warnings
warnings.filterwarnings("ignore")


           Model Performance Comparison
        Model  Accuracy  Precision  Recall  F1 Score
Random Forest    0.9857     0.9872  0.9868    0.9855
      XGBoost    0.9810     0.9813  0.9814    0.9798
     LightGBM    0.9810     0.9813  0.9814    0.9798
Decision Tree    0.9786     0.9781  0.9801    0.9779


In [12]:
import pickle
from sklearn.preprocessing import LabelEncoder
from lightgbm import LGBMClassifier

# Train LightGBM model 
lightgbm_model = LGBMClassifier(random_state=42)
lightgbm_model.fit(X_train, y_train) 

# Save the trained model 
with open("lightgbm_model.pkl", "wb") as f:
    pickle.dump(lightgbm_model, f)

print("✅ LightGBM model and LabelEncoder saved successfully")


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000135 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1330
[LightGBM] [Info] Number of data points in the train set: 1680, number of used features: 7
[LightGBM] [Info] Start training from score -3.044522
[LightGBM] [Info] Start training from score -3.007708
[LightGBM] [Info] Start training from score -3.122484
[LightGBM] [Info] Start training from score -3.044522
[LightGBM] [Info] Start training from score -3.069840
[LightGBM] [Info] Start training from score -2.972202
[LightGBM] [Info] Start training from score -2.983898
[LightGBM] [Info] Start training from score -3.019830
[LightGBM] [Info] Start training from score -3.044522
[LightGBM] [Info] Start training from score -3.019830
[LightGBM] [Info] Start training from score -3.007708
[LightGBM] [Info] Start training from score -3.019830
[LightGBM] [Info] Start training from score -3.122484
[LightGBM] 