In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.multioutput import MultiOutputClassifier
import pickle

# Load Dataset
data = pd.read_csv('Largescale_plants.csv', encoding='utf-8')

# Define Features (X) and Targets (y)
features = ['Average Size', 'Climate', 'Soil Type', 'Experience', 'Labour Requirement', 'Irrigation System']
targets = ['Recommended Plant','Crop Yield Potential','Pest and Disease Resistance']

X = data[features]
y = data[targets]

# Encode categorical features using LabelEncoder
label_encoders = {}
for column in X.columns:
    if X[column].dtype == 'object':  # Encode only categorical columns
        le = LabelEncoder()
        X.loc[:, column] = le.fit_transform(X[column])
        label_encoders[column] = le

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Use MultiOutputClassifier for multi-output prediction
rf_classifier = MultiOutputClassifier(RandomForestClassifier(n_estimators=100, random_state=42))
rf_classifier.fit(X_train, y_train)

# Save the trained model and label encoders using pickle
with open('plant_recommendation_model_largescale.pkl', 'wb') as model_file:
    pickle.dump(rf_classifier, model_file)

with open('label_encoders_largescale.pkl', 'wb') as encoder_file:
    pickle.dump(label_encoders, encoder_file)

print("Model and label encoders saved successfully!")


Model and label encoders saved successfully!


In [None]:
import pandas as pd
import pickle

# Load the trained model and label encoders
try:
    with open('plant_recommendation_model_largescale.pkl', 'rb') as model_file:
        loaded_model = pickle.load(model_file)

    with open('label_encoders_largescale.pkl', 'rb') as encoder_file:
        loaded_encoders = pickle.load(encoder_file)
    print("Loaded encoder keys:", loaded_encoders.keys())  # Debugging: Print encoder keys
except FileNotFoundError:
    print("Error: Model or encoder files not found. Please train the model first.")
    exit()

def get_user_input():
    """Gets user input for plant features."""
    features = {}
    features['Average Size'] = input("Enter Average Size (Less than 10 acres / 10 to 100 acres / More than 100 acres): ").strip().capitalize()
    features['Climate'] = input("Enter Climate (Tropical / Temperate / Arid): ").strip().capitalize()
    features['Soil Type'] = input("Enter Soil Type (Loamy Soil / Clayey Soil / Sandy Soil): ").strip().title()
    features['Experience'] = input("Enter Experience (New for Farming / Successful): ").strip().capitalize()
    features['Labour Requirement'] = input("Enter Labour Requirement (Medium / High): ").strip().capitalize()
    features['Irrigation System'] = input("Enter Irrigation System (Fully Irrigated / Partial Irrigation / Rain-fed): ").strip().capitalize()
    return pd.DataFrame([features])

def predict_plant(user_data, model, encoders):
    """Predicts the recommended plant based on user input."""
    try:
        encoded_data = user_data.copy()
        for column in encoded_data.columns:
            if column in encoders:
                le = encoders[column]
                if user_data[column][0] in le.classes_:
                    encoded_data.loc[:, column] = le.transform(user_data[column])
                else:
                    print(f"Warning: Unseen label '{user_data[column][0]}' for column '{column}'. Defaulting to most frequent class.")
                    encoded_data.loc[:, column] = le.transform([le.classes_[0]])  # Default to most common class

        prediction = model.predict(encoded_data)
        
        print(f"Prediction shape: {prediction.shape}")  # Debugging: Print the shape of the prediction

        if prediction.shape[1] == 3:
            return prediction[0][0], prediction[0][1], prediction[0][2]
        else:
            print("Unexpected model output format.")
            return None, None, None
    except KeyError as e:
        print(f"Error: Invalid feature input. Please check your inputs. {e}")
        return None, None, None
    except ValueError as e:
        print(f"Error: Input value not in training data. Please check your inputs. {e}")
        return None, None, None

# Main execution
user_input_data = get_user_input()
recommended_plant, pest_disease_resistance, crop_yield_potential = predict_plant(user_input_data, loaded_model, loaded_encoders)

if recommended_plant is not None:
    print(f"\n--- Plant Recommendation ---")
    print(f"Recommended Plant: {recommended_plant}")
    print(f"Crop Yield Potential: {crop_yield_potential}")
    print(f"Pest and Disease Resistance: {pest_disease_resistance}")


Loaded encoder keys: dict_keys(['Average Size', 'Climate', 'Soil Type', 'Experience', 'Labour Requirement', 'Irrigation System'])
