In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.preprocessing import LabelEncoder

# Load crop dataset
file_path =  "/content/drive/MyDrive/crops .csv"
data = pd.read_csv(file_path)

# Separate features and target
X = data[['temperature', 'humidity', 'ph', 'rainfall']]
y = data['label']

# Encode target labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split data into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.3, stratify=y_encoded, random_state=42)

# Train Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cv_scores = cross_val_score(rf_model, X_train, y_train, cv=cv, scoring='accuracy')
rf_model.fit(X_train, y_train)

# Function to get soil pH range based on location
def get_soil_ph_range(location):
    soil_file_path = "/content/drive/MyDrive/ph.xlsx"
    df = pd.read_excel(soil_file_path)

    # Normalize column names and location values
    df.columns = df.columns.str.strip()
    df['location'] = df['location'].astype(str).str.replace(r'\s+', ' ', regex=True).str.strip().str.lower()

    # Normalize input location
    location = location.strip().lower()

    # Find matching region
    result = df[df['location'] == location]

    if not result.empty:
        return result.iloc[0]['Min ph'], result.iloc[0]['Max ph']
    else:
        return None, None

# Function to get weather data from an Excel file
def get_weather_data(location):
    weather_file_path = "/content/drive/MyDrive/weather data1.xlsx"
    df = pd.read_excel(weather_file_path)

    # Normalize column names and location values
    df.columns = df.columns.str.strip()
    df['location'] = df['location'].astype(str).str.replace(r'\s+', ' ', regex=True).str.strip().str.lower()

    # Normalize input location
    location = location.strip().lower()

    # Find matching location
    result = df[df['location'] == location]

    if not result.empty:
        avg_temperature = result.iloc[0]['temperature']
        avg_humidity = result.iloc[0]['humidity']
        avg_rainfall = result.iloc[0]['rainfall']
        return avg_temperature, avg_humidity, avg_rainfall
    else:
        return None, None, None  # Return None if location not found

# Function to filter plants based on pH range
def filter_plants_by_ph_range(min_ph, max_ph):
    # Filter the plant dataset based on the pH range
    filtered_plants = data[(data['ph'] >= min_ph) & (data['ph'] <= max_ph)]
    return filtered_plants

# Function to predict top crops
def predict_top_crops(location, top_n=10):
    min_ph, max_ph = get_soil_ph_range(location)
    if min_ph is None or max_ph is None:
        print("Soil pH range not found for location.")
        return

    temperature, humidity, rainfall = get_weather_data(location)
    if temperature is None or humidity is None or rainfall is None:
        print("Weather data not found for location.")
        return

    # Step 1: Filter plants based on pH range
    filtered_plants = filter_plants_by_ph_range(min_ph, max_ph)
    if filtered_plants.empty:
        print(f"No plants found within the pH range for {location}.")
        return

    # Prepare input sample for the model using filtered plants' features
    input_features = filtered_plants[['temperature', 'humidity', 'ph', 'rainfall']].copy()
    input_features['temperature'] = temperature
    input_features['humidity'] = humidity
    input_features['rainfall'] = rainfall
    input_features['ph'] = min_ph  # Use the min pH as the value

    # Predict top crops
    probabilities = rf_model.predict_proba(input_features)  # Get probabilities for each plant
    top_indices = np.argsort(probabilities.sum(axis=1))[-5:][::-1]  # Sort plants by sum of probabilities (rank by suitability)
    top_crops = [label_encoder.classes_[i] for i in top_indices]

    print(f"Top recommended crops for {location}:")
    print(", ".join(top_crops))

# Example usage
if __name__ == "__main__":
    location_name = input("Enter location name: ")
    predict_top_crops(location_name)


Enter location name: colombo


IndexError: index 510 is out of bounds for axis 0 with size 22

In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.preprocessing import LabelEncoder

# Load crop dataset
file_path = "/content/drive/MyDrive/crops .csv"
data = pd.read_csv(file_path)

# Separate features and target
X = data[['temperature', 'humidity', 'ph', 'rainfall']]
y = data['label']

# Encode target labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split data into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.3, stratify=y_encoded, random_state=42)

# Train Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cv_scores = cross_val_score(rf_model, X_train, y_train, cv=cv, scoring='accuracy')
rf_model.fit(X_train, y_train)

# Function to get soil pH range based on location
def get_soil_ph_range(location):
    soil_file_path = "/content/drive/MyDrive/ph.xlsx"
    df = pd.read_excel(soil_file_path)

    # Normalize column names and location values
    df.columns = df.columns.str.strip()
    df['location'] = df['location'].astype(str).str.replace(r'\s+', ' ', regex=True).str.strip().str.lower()

    # Normalize input location
    location = location.strip().lower()

    # Find matching region
    result = df[df['location'] == location]

    if not result.empty:
        return result.iloc[0]['Min ph'], result.iloc[0]['Max ph']
    else:
        return None, None

# Function to get weather data from an Excel file
def get_weather_data(location):
    weather_file_path = "/content/drive/MyDrive/weather data1.xlsx"
    df = pd.read_excel(weather_file_path)

    # Normalize column names and location values
    df.columns = df.columns.str.strip()
    df['location'] = df['location'].astype(str).str.replace(r'\s+', ' ', regex=True).str.strip().str.lower()

    # Normalize input location
    location = location.strip().lower()

    # Find matching location
    result = df[df['location'] == location]

    if not result.empty:
        avg_temperature = result.iloc[0]['temperature']
        avg_humidity = result.iloc[0]['humidity']
        avg_rainfall = result.iloc[0]['rainfall']
        return avg_temperature, avg_humidity, avg_rainfall
    else:
        return None, None, None  # Return None if location not found

# Function to filter plants based on pH range
def filter_plants_by_ph_range(min_ph, max_ph):
    # Filter the plant dataset based on the pH range
    filtered_plants = data[(data['ph'] >= min_ph) & (data['ph'] <= max_ph)]
    return filtered_plants

# Function to predict top crops
def predict_top_crops(location, top_n=10):
    min_ph, max_ph = get_soil_ph_range(location)
    if min_ph is None or max_ph is None:
        print("Soil pH range not found for location.")
        return

    temperature, humidity, rainfall = get_weather_data(location)
    if temperature is None or humidity is None or rainfall is None:
        print("Weather data not found for location.")
        return

    # Step 1: Filter plants based on pH range
    filtered_plants = filter_plants_by_ph_range(min_ph, max_ph)
    if filtered_plants.empty:
        print(f"No plants found within the pH range for {location}.")
        return

    # Step 2: Prepare the features for the filtered plants
    input_features = filtered_plants[['temperature', 'humidity', 'ph', 'rainfall']].copy()
    input_features['temperature'] = temperature
    input_features['humidity'] = humidity
    input_features['rainfall'] = rainfall
    input_features['ph'] = min_ph  # Use the min pH as the value for prediction

    # Step 3: Predict probabilities
    probabilities = rf_model.predict_proba(input_features)  # Get probabilities for each plant

    # Step 4: Sum the probabilities and sort by suitability (highest probability)
    top_indices = np.argsort(probabilities.sum(axis=1))[-top_n:][::-1]  # Sort plants by sum of probabilities (rank by suitability)
    top_crops = [label_encoder.classes_[i] for i in top_indices]  # Map indices to crop labels

    # Step 5: Display top crops
    print(f"Top recommended crops for {location}:")
    print(", ".join(top_crops))

# Example usage
if __name__ == "__main__":
    location_name = input("Enter location name: ")
    predict_top_crops(location_name)


Enter location name: colombo


IndexError: index 510 is out of bounds for axis 0 with size 22

In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.preprocessing import LabelEncoder

# Load crop dataset
file_path = "/content/drive/MyDrive/crops .csv"
data = pd.read_csv(file_path)

# Separate features and target
X = data[['temperature', 'humidity', 'ph', 'rainfall']]
y = data['label']

# Encode target labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split data into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.3, stratify=y_encoded, random_state=42)

# Train Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cv_scores = cross_val_score(rf_model, X_train, y_train, cv=cv, scoring='accuracy')
rf_model.fit(X_train, y_train)

# Function to get soil pH range based on location
def get_soil_ph_range(location):
    soil_file_path = "/content/drive/MyDrive/ph.xlsx"
    df = pd.read_excel(soil_file_path)

    # Normalize column names and location values
    df.columns = df.columns.str.strip()
    df['location'] = df['location'].astype(str).str.replace(r'\s+', ' ', regex=True).str.strip().str.lower()

    # Normalize input location
    location = location.strip().lower()

    # Find matching region
    result = df[df['location'] == location]

    if not result.empty:
        min_ph = result.iloc[0]['Min ph']
        max_ph = result.iloc[0]['Max ph']
        print(f"Found pH range for {location}: Min pH = {min_ph}, Max pH = {max_ph}")
        return min_ph, max_ph
    else:
        print(f"No pH range found for location: {location}")
        return None, None

# Function to get weather data from an Excel file
def get_weather_data(location):
    weather_file_path = "/content/drive/MyDrive/weather data1.xlsx"
    df = pd.read_excel(weather_file_path)

    # Normalize column names and location values
    df.columns = df.columns.str.strip()
    df['location'] = df['location'].astype(str).str.replace(r'\s+', ' ', regex=True).str.strip().str.lower()

    # Normalize input location
    location = location.strip().lower()

    # Find matching location
    result = df[df['location'] == location]

    if not result.empty:
        avg_temperature = result.iloc[0]['temperature']
        avg_humidity = result.iloc[0]['humidity']
        avg_rainfall = result.iloc[0]['rainfall']
        print(f"Found weather data for {location}: Temperature = {avg_temperature}, Humidity = {avg_humidity}, Rainfall = {avg_rainfall}")
        return avg_temperature, avg_humidity, avg_rainfall
    else:
        print(f"No weather data found for location: {location}")
        return None, None, None

# Function to filter plants based on pH range
def filter_plants_by_ph_range(min_ph, max_ph):
    # Filter the plant dataset based on the pH range
    filtered_plants = data[(data['ph'] >= min_ph) & (data['ph'] <= max_ph)]
    print(f"Found {len(filtered_plants)} plants within the pH range of {min_ph} to {max_ph}")
    return filtered_plants

# Function to predict top crops
def predict_top_crops(location, top_n=10):
    min_ph, max_ph = get_soil_ph_range(location)
    if min_ph is None or max_ph is None:
        print("Soil pH range not found for location.")
        return

    temperature, humidity, rainfall = get_weather_data(location)
    if temperature is None or humidity is None or rainfall is None:
        print("Weather data not found for location.")
        return

    # Step 1: Filter plants based on pH range
    filtered_plants = filter_plants_by_ph_range(min_ph, max_ph)
    if filtered_plants.empty:
        print(f"No plants found within the pH range for {location}.")
        return

    # Step 2: Prepare the features for the filtered plants
    input_features = filtered_plants[['temperature', 'humidity', 'ph', 'rainfall']].copy()
    input_features['temperature'] = temperature
    input_features['humidity'] = humidity
    input_features['rainfall'] = rainfall
    input_features['ph'] = min_ph  # Use the min pH as the value for prediction

    # Step 3: Predict probabilities
    probabilities = rf_model.predict_proba(input_features)  # Get probabilities for each plant

    # Step 4: Ensure that the correct classes are used
    top_indices = np.argsort(probabilities.sum(axis=1))[-top_n:][::-1]  # Sort plants by sum of probabilities (rank by suitability)

    # Step 5: Correct index mapping
    top_crops = [label_encoder.classes_[i] for i in top_indices if i < len(label_encoder.classes_)]  # Ensure valid indices

    # Step 6: Display top crops
    if top_crops:
        print(f"Top recommended crops for {location}:")
        print(", ".join(top_crops))
    else:
        print(f"No recommended crops found for {location}.")

# Example usage
if __name__ == "__main__":
    location_name = input("Enter location name: ")
    predict_top_crops(location_name)


Enter location name: colombo
Found pH range for colombo: Min pH = 5, Max pH = 6
Found weather data for colombo: Temperature = 27.929, Humidity = 74, Rainfall = 213.516
Found 511 plants within the pH range of 5 to 6
No recommended crops found for colombo.


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder

# Load plant dataset
file_path = "/content/drive/MyDrive/crops .csv"
data = pd.read_csv(file_path)

# Separate features and target
X = data[['temperature', 'humidity', 'ph', 'rainfall']]
y = data['label']

# Encode target labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.3, stratify=y_encoded, random_state=42
)

# Train Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cross_val_score(rf_model, X_train, y_train, cv=cv, scoring='accuracy')
rf_model.fit(X_train, y_train)

# Load soil pH dataset
def get_soil_ph_range(location):
    soil_file_path = "/content/drive/MyDrive/ph.xlsx"
    df = pd.read_excel(soil_file_path)
    df.columns = df.columns.str.strip().str.lower()
    location = location.strip().lower()

    result = df[df['location'] == location]
    if not result.empty:
        return result.iloc[0]['min ph'], result.iloc[0]['max ph']
    return None, None

# Load weather data
def get_weather_data(location):
    weather_file_path = "/content/drive/MyDrive/weather data1.xlsx"
    df = pd.read_excel(weather_file_path)
    df.columns = df.columns.str.strip().str.lower()
    location = location.strip().lower()

    result = df[df['location'] == location]
    if not result.empty:
        return result.iloc[0]['temperature'], result.iloc[0]['humidity'], result.iloc[0]['rainfall']
    return None, None, None

# Predict top crops based on location
def predict_top_crops(location, top_n=10):
    min_ph, max_ph = get_soil_ph_range(location)
    if min_ph is None or max_ph is None:
        print(f"Soil pH data not found for {location}.")
        return

    temperature, humidity, rainfall = get_weather_data(location)
    if temperature is None or humidity is None or rainfall is None:
        print(f"Weather data not found for {location}.")
        return

    # Filter plants within the pH range
    filtered_plants = data[(data['ph'] >= min_ph) & (data['ph'] <= max_ph)]

    if filtered_plants.empty:
        print(f"No plants found for pH range {min_ph}-{max_ph}.")
        return

    # Prepare input sample
    input_features = pd.DataFrame([[temperature, humidity, min_ph, rainfall]],
                                  columns=['temperature', 'humidity', 'ph', 'rainfall'])

    # Predict top crops
    probabilities = rf_model.predict_proba(input_features)[0]
    top_indices = np.argsort(probabilities)[-top_n:][::-1]
    top_crops = [label_encoder.classes_[i] for i in top_indices if i < len(label_encoder.classes_)]

    print(f"Top recommended crops for {location}: {', '.join(top_crops)}")

# Example usage
if __name__ == "__main__":
    location_name = input("Enter location name: ")
    predict_top_crops(location_name)


Enter location name: badulla
Top recommended crops for badulla: jute, rice, pigeonpeas, coffee, maize, kidneybeans, coconut, watermelon, banana, blackgram


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder

# Load plant dataset
file_path = "/content/drive/MyDrive/crops .csv"
data = pd.read_csv(file_path)

# Separate features and target
X = data[['temperature', 'humidity', 'ph', 'rainfall']]
y = data['label']

# Encode target labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.3, stratify=y_encoded, random_state=42
)

# Train Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cross_val_score(rf_model, X_train, y_train, cv=cv, scoring='accuracy')
rf_model.fit(X_train, y_train)

# Load soil pH dataset
def get_soil_ph_range(location):
    soil_file_path = "/content/drive/MyDrive/ph.xlsx"
    df = pd.read_excel(soil_file_path)
    df.columns = df.columns.str.strip().str.lower()
    location = location.strip().lower()

    result = df[df['location'] == location]
    if not result.empty:
        return result.iloc[0]['min ph'], result.iloc[0]['max ph']
    return None, None

# Load weather data
def get_weather_data(location):
    weather_file_path = "/content/drive/MyDrive/weather data1.xlsx"
    df = pd.read_excel(weather_file_path)
    df.columns = df.columns.str.strip().str.lower()
    location = location.strip().lower()

    result = df[df['location'] == location]
    if not result.empty:
        return result.iloc[0]['temperature'], result.iloc[0]['humidity'], result.iloc[0]['rainfall']
    return None, None, None

# Predict top crops based on location (considering pH and other environmental factors)
def predict_top_crops(location, top_n=10):
    min_ph, max_ph = get_soil_ph_range(location)
    if min_ph is None or max_ph is None:
        print(f"Soil pH data not found for {location}.")
        return

    temperature, humidity, rainfall = get_weather_data(location)
    if temperature is None or humidity is None or rainfall is None:
        print(f"Weather data not found for {location}.")
        return

    # Filter plants within the pH range
    filtered_plants = data[(data['ph'] >= min_ph) & (data['ph'] <= max_ph)]

    if filtered_plants.empty:
        print(f"No plants found for pH range {min_ph}-{max_ph}.")
        return

    # Prepare input sample for pH-based prediction
    input_features_ph = pd.DataFrame([[min_ph]], columns=['ph'])
    probabilities_ph = rf_model.predict_proba(input_features_ph)[0]
    top_indices_ph = np.argsort(probabilities_ph)[-top_n:][::-1]
    top_crops_ph = [label_encoder.classes_[i] for i in top_indices_ph]

    # Prepare input sample for environmental factors prediction (consider temperature, humidity, pH, rainfall)
    input_features_full = pd.DataFrame([[temperature, humidity, min_ph, rainfall]],
                                       columns=['temperature', 'humidity', 'ph', 'rainfall'])
    probabilities_full = rf_model.predict_proba(input_features_full)[0]
    top_indices_full = np.argsort(probabilities_full)[-top_n:][::-1]
    top_crops_full = [label_encoder.classes_[i] for i in top_indices_full]

    # Find common crops between both predictions
    final_top_crops = list(set(top_crops_ph) & set(top_crops_full))

    print(f"Top recommended crops for {location}: {', '.join(final_top_crops)}")

# Example usage
if __name__ == "__main__":
    location_name = input("Enter location name: ")
    predict_top_crops(location_name)


Enter location name: colombo


ValueError: The feature names should match those that were passed during fit.
Feature names seen at fit time, yet now missing:
- humidity
- rainfall
- temperature


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder

# Load plant dataset
file_path = "/content/drive/MyDrive/crops .csv"
data = pd.read_csv(file_path)

# Separate features and target
X = data[['temperature', 'humidity', 'ph', 'rainfall']]
y = data['label']

# Encode target labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.3, stratify=y_encoded, random_state=42
)

# Train Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cross_val_score(rf_model, X_train, y_train, cv=cv, scoring='accuracy')
rf_model.fit(X_train, y_train)

# Load soil pH dataset
def get_soil_ph_range(location):
    soil_file_path = "/content/drive/MyDrive/ph.xlsx"
    df = pd.read_excel(soil_file_path)
    df.columns = df.columns.str.strip().str.lower()
    location = location.strip().lower()

    result = df[df['location'] == location]
    if not result.empty:
        return result.iloc[0]['min ph'], result.iloc[0]['max ph']
    return None, None

# Load weather data
def get_weather_data(location):
    weather_file_path = "/content/drive/MyDrive/weather data1.xlsx"
    df = pd.read_excel(weather_file_path)
    df.columns = df.columns.str.strip().str.lower()
    location = location.strip().lower()

    result = df[df['location'] == location]
    if not result.empty:
        return result.iloc[0]['temperature'], result.iloc[0]['humidity'], result.iloc[0]['rainfall']
    return None, None, None

# Predict top crops based on location (considering pH and other environmental factors)
def predict_top_crops(location, top_n=10):
    min_ph, max_ph = get_soil_ph_range(location)
    if min_ph is None or max_ph is None:
        print(f"Soil pH data not found for {location}.")
        return

    temperature, humidity, rainfall = get_weather_data(location)
    if temperature is None or humidity is None or rainfall is None:
        print(f"Weather data not found for {location}.")
        return

    # Filter plants within the pH range
    filtered_plants = data[(data['ph'] >= min_ph) & (data['ph'] <= max_ph)]

    if filtered_plants.empty:
        print(f"No plants found for pH range {min_ph}-{max_ph}.")
        return

    # Prepare input sample for pH-based prediction (only using pH feature)
    input_features_ph = pd.DataFrame([[min_ph]], columns=['ph'])
    probabilities_ph = rf_model.predict_proba(input_features_ph)[0]
    top_indices_ph = np.argsort(probabilities_ph)[-top_n:][::-1]
    top_crops_ph = [label_encoder.classes_[i] for i in top_indices_ph]

    # Prepare input sample for environmental factors prediction (temperature, humidity, pH, rainfall)
    input_features_full = pd.DataFrame([[temperature, humidity, min_ph, rainfall]],
                                       columns=['temperature', 'humidity', 'ph', 'rainfall'])
    # Ensure the feature columns are in the same order as when the model was trained
    input_features_full = input_features_full[['temperature', 'humidity', 'ph', 'rainfall']]

    probabilities_full = rf_model.predict_proba(input_features_full)[0]
    top_indices_full = np.argsort(probabilities_full)[-top_n:][::-1]
    top_crops_full = [label_encoder.classes_[i] for i in top_indices_full]

    # Find common crops between both predictions
    final_top_crops = list(set(top_crops_ph) & set(top_crops_full))

    print(f"Top recommended crops for {location}: {', '.join(final_top_crops)}")

# Example usage
if __name__ == "__main__":
    location_name = input("Enter location name: ")
    predict_top_crops(location_name)


Enter location name: colombo


ValueError: The feature names should match those that were passed during fit.
Feature names seen at fit time, yet now missing:
- humidity
- rainfall
- temperature


In [None]:
# Predict top crops based on location and filter using the trained model
def predict_top_crops_with_model(location, top_n=10):
    min_ph, max_ph = get_soil_ph_range(location)
    if min_ph is None or max_ph is None:
        print(f"Soil pH data not found for {location}.")
        return

    temperature, humidity, rainfall = get_weather_data(location)
    if temperature is None or humidity is None or rainfall is None:
        print(f"Weather data not found for {location}.")
        return

    # Loop over pH values in the range and predict crop suitability
    suitable_crops = []
    for ph in np.arange(min_ph, max_ph, 0.1):  # Loop over pH values in the range with step of 0.1
        # Prepare input sample for prediction with current pH value
        input_features = pd.DataFrame([[temperature, humidity, ph, rainfall]],
                                      columns=['temperature', 'humidity', 'ph', 'rainfall'])

        # Get the model's predicted probabilities for the crops
        probabilities = rf_model.predict_proba(input_features)[0]

        # Collect crops with predicted probability above a threshold
        threshold = 0.1  # Adjust the threshold based on your needs
        for i, prob in enumerate(probabilities):
            if prob >= threshold:
                suitable_crops.append((label_encoder.classes_[i], prob, ph))

    # Sort crops based on probability and filter top N
    suitable_crops_sorted = sorted(suitable_crops, key=lambda x: x[1], reverse=True)[:top_n]

    # Print top crops based on suitability
    print(f"Top recommended crops for {location} based on pH range {min_ph}-{max_ph}:")
    for crop, prob, ph in suitable_crops_sorted:
        print(f"{crop}: Probability: {prob:.2f}, pH: {ph:.2f}")

# Example usage
if __name__ == "__main__":
    location_name = input("Enter location name: ")
    predict_top_crops_with_model(location_name)


Enter location name: colombo
Top recommended crops for colombo based on pH range 5-6:
banana: Probability: 0.46, pH: 5.00
banana: Probability: 0.46, pH: 5.10
banana: Probability: 0.46, pH: 5.20
banana: Probability: 0.46, pH: 5.30
banana: Probability: 0.46, pH: 5.40
banana: Probability: 0.46, pH: 5.50
banana: Probability: 0.46, pH: 5.60
banana: Probability: 0.46, pH: 5.70
banana: Probability: 0.46, pH: 5.80
banana: Probability: 0.46, pH: 5.90


In [None]:
# Predict top crops based on location and weather data, after filtering by pH range
def predict_top_crops_using_model(location, top_n=10):
    min_ph, max_ph = get_soil_ph_range(location)
    if min_ph is None or max_ph is None:
        print(f"Soil pH data not found for {location}.")
        return

    temperature, humidity, rainfall = get_weather_data(location)
    if temperature is None or humidity is None or rainfall is None:
        print(f"Weather data not found for {location}.")
        return

    # Step 1: Filter crops based on the pH range
    filtered_crops = []
    for ph in np.arange(min_ph, max_ph, 0.1):  # Loop over pH values in the range
        # Prepare input sample for prediction with current pH value
        input_features = pd.DataFrame([[temperature, humidity, ph, rainfall]],
                                      columns=['temperature', 'humidity', 'ph', 'rainfall'])

        # Get the model's predicted probabilities for the crops
        probabilities = rf_model.predict_proba(input_features)[0]

        # Collect crops with predicted probability above a threshold
        threshold = 0.1  # Adjust the threshold based on your needs
        for i, prob in enumerate(probabilities):
            if prob >= threshold:
                filtered_crops.append((label_encoder.classes_[i], prob, ph))

    # Step 2: Sort crops based on probability
    filtered_crops_sorted = sorted(filtered_crops, key=lambda x: x[1], reverse=True)

    # Step 3: Get top 10 crops (if available)
    top_crops = filtered_crops_sorted[:top_n]

    # Print the top crops based on suitability
    if top_crops:
        print(f"Top recommended crops for {location} based on pH range {min_ph}-{max_ph} and weather conditions:")
        for crop, prob, ph in top_crops:
            print(f"{crop}: Probability: {prob:.2f}, pH: {ph:.2f}")
    else:
        print(f"No suitable crops found for {location} in the pH range {min_ph}-{max_ph}.")

# Example usage
if __name__ == "__main__":
    location_name = input("Enter location name: ")
    predict_top_crops_using_model(location_name)


Enter location name: colombo
Top recommended crops for colombo based on pH range 5-6 and weather conditions:
banana: Probability: 0.46, pH: 5.00
banana: Probability: 0.46, pH: 5.10
banana: Probability: 0.46, pH: 5.20
banana: Probability: 0.46, pH: 5.30
banana: Probability: 0.46, pH: 5.40
banana: Probability: 0.46, pH: 5.50
banana: Probability: 0.46, pH: 5.60
banana: Probability: 0.46, pH: 5.70
banana: Probability: 0.46, pH: 5.80
banana: Probability: 0.46, pH: 5.90


In [None]:
def predict_top_crops_using_model(location, top_n=10):
    min_ph, max_ph = get_soil_ph_range(location)
    if min_ph is None or max_ph is None:
        print(f"Soil pH data not found for {location}.")
        return

    temperature, humidity, rainfall = get_weather_data(location)
    if temperature is None or humidity is None or rainfall is None:
        print(f"Weather data not found for {location}.")
        return

    # Step 1: Filter crops based on the pH range
    filtered_crops = []
    for ph in np.arange(min_ph, max_ph, 0.1):  # Loop over pH values in the range
        # Prepare input sample for prediction with current pH value
        input_features = pd.DataFrame([[temperature, humidity, ph, rainfall]],
                                      columns=['temperature', 'humidity', 'ph', 'rainfall'])

        # Get the model's predicted probabilities for the crops
        probabilities = rf_model.predict_proba(input_features)[0]

        # Collect crops with predicted probability above a threshold
        threshold = 0.1  # Adjust the threshold based on your needs
        for i, prob in enumerate(probabilities):
            if prob >= threshold:
                filtered_crops.append((label_encoder.classes_[i], prob, ph))

    # Step 2: Remove duplicates by crop name (if needed)
    unique_crops = {}
    for crop, prob, ph in filtered_crops:
        if crop not in unique_crops:
            unique_crops[crop] = (prob, ph)

    # Step 3: Sort crops based on probability
    sorted_crops = sorted(unique_crops.items(), key=lambda x: x[1][0], reverse=True)

    # Step 4: Get top 10 crops (if available)
    top_crops = sorted_crops[:top_n]

    # Print the top crops based on suitability
    if top_crops:
        print(f"Top recommended crops for {location} based on pH range {min_ph}-{max_ph} and weather conditions:")
        for crop, (prob, ph) in top_crops:
            print(f"{crop}: Probability: {prob:.2f}, pH: {ph:.2f}")
    else:
        print(f"No suitable crops found for {location} in the pH range {min_ph}-{max_ph}.")

# Example usage
if __name__ == "__main__":
    location_name = input("Enter location name: ")
    predict_top_crops_using_model(location_name)


Enter location name: colombo
Top recommended crops for colombo based on pH range 5-6 and weather conditions:
banana: Probability: 0.46, pH: 5.00
rice: Probability: 0.22, pH: 5.00
jute: Probability: 0.18, pH: 5.00


In [None]:
def predict_top_crops_using_model(location, top_n=10):
    min_ph, max_ph = get_soil_ph_range(location)
    if min_ph is None or max_ph is None:
        print(f"Soil pH data not found for {location}.")
        return

    temperature, humidity, rainfall = get_weather_data(location)
    if temperature is None or humidity is None or rainfall is None:
        print(f"Weather data not found for {location}.")
        return

    # Step 1: Filter crops based on the pH range
    filtered_crops = []
    for ph in np.arange(min_ph, max_ph, 0.1):  # Loop over pH values in the range
        # Prepare input sample for prediction with current pH value
        input_features = pd.DataFrame([[temperature, humidity, ph, rainfall]],
                                      columns=['temperature', 'humidity', 'ph', 'rainfall'])

        # Get the model's predicted probabilities for the crops
        probabilities = rf_model.predict_proba(input_features)[0]

        # Collect crops with predicted probability above a threshold
        threshold = 0.1  # Adjust the threshold based on your needs
        for i, prob in enumerate(probabilities):
            if prob >= threshold:
                filtered_crops.append((label_encoder.classes_[i], prob, ph))

    # Step 2: Remove duplicates by crop name (if needed)
    unique_crops = {}
    for crop, prob, ph in filtered_crops:
        if crop not in unique_crops:
            unique_crops[crop] = (prob, ph)

    # Step 3: Sort crops based on probability
    sorted_crops = sorted(unique_crops.items(), key=lambda x: x[1][0], reverse=True)

    # Step 4: Ensure that at least top_n crops are selected
    top_crops = sorted_crops[:top_n] if len(sorted_crops) >= top_n else sorted_crops

    # Step 5: Print the final top crops
    if top_crops:
        print(f"Top recommended crops for {location} based on pH range {min_ph}-{max_ph} and weather conditions:")
        for crop, (prob, ph) in top_crops:
            print(f"{crop}: Probability: {prob:.2f}, pH: {ph:.2f}")
    else:
        print(f"No suitable crops found for {location} in the pH range {min_ph}-{max_ph}.")

# Example usage
if __name__ == "__main__":
    location_name = input("Enter location name: ")
    predict_top_crops_using_model(location_name)


Enter location name: galle
Top recommended crops for galle based on pH range 4-5 and weather conditions:
pigeonpeas: Probability: 0.46, pH: 4.00
coffee: Probability: 0.45, pH: 4.00
