In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder

# Load plant dataset
file_path = "/content/drive/MyDrive/crops .csv"
data = pd.read_csv(file_path)

# Separate features and target
X = data[['temperature', 'humidity', 'ph', 'rainfall']]
y = data['label']

# Encode target labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.3, stratify=y_encoded, random_state=42
)

# Train Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cross_val_score(rf_model, X_train, y_train, cv=cv, scoring='accuracy')
rf_model.fit(X_train, y_train)


# Load soil pH dataset
def get_soil_ph_range(location):
    soil_file_path = "/content/drive/MyDrive/ph.xlsx"
    df = pd.read_excel(soil_file_path)
    df.columns = df.columns.str.strip().str.lower()
    location = location.strip().lower()

    result = df[df['location'] == location]
    if not result.empty:
        return result.iloc[0]['min ph'], result.iloc[0]['max ph']
    return None, None


# Load weather data
def get_weather_data(location):
    weather_file_path = "/content/drive/MyDrive/weather data1.xlsx"
    df = pd.read_excel(weather_file_path)
    df.columns = df.columns.str.strip().str.lower()
    location = location.strip().lower()

    result = df[df['location'] == location]
    if not result.empty:
        return result.iloc[0]['temperature'], result.iloc[0]['humidity'], result.iloc[0]['rainfall']
    return None, None, None


# Function to get plant recommendations
def predict_top_crops(location, top_n=10):
    min_ph, max_ph = get_soil_ph_range(location)
    if min_ph is None or max_ph is None:
        return {"error": f"Soil pH data not found for {location}."}

    temperature, humidity, rainfall = get_weather_data(location)
    if temperature is None or humidity is None or rainfall is None:
        return {"error": f"Weather data not found for {location}."}

    # Prepare input sample
    input_features = pd.DataFrame([[temperature, humidity, (min_ph+max_ph)/2, rainfall]],
                                  columns=['temperature', 'humidity', 'ph', 'rainfall'])

    # Predict top crops
    probabilities = rf_model.predict_proba(input_features)[0]
    top_indices = np.argsort(probabilities)[-top_n:][::-1]
    top_crops = [label_encoder.classes_[i] for i in top_indices if i < len(label_encoder.classes_)]

    return {"plants": [{"name": crop} for crop in top_crops]}

# Get user input for location
location = input("Enter the location: ")

# Example usage
result = predict_top_crops(location)
if 'error' in result:
    print(result['error'])
else:
    print(f"Top recommended crops for {location}:")
    for plant in result['plants']:
        print(f"{plant['name']}")


Enter the location: nuwara eliya
Top recommended crops for nuwara eliya:
rice
grapes
jute
pigeonpeas
orange
maize
watermelon
banana
blackgram
chickpea
