In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load dataset
df = pd.read_csv("plant_recommendation_dataset.csv")

# Feature selection
features_to_exclude = [
    'Disease Resistance', 'Pruning Requirement', 'Companion Planting Suitability',
    'Water Requirement', 'Irrigation Frequency', 'Recommended Organic Fertilizers',
    'Climate Suitability', 'Sunlight Requirement', 'Edible'
]
all_features = [col for col in df.columns if col not in features_to_exclude]
df = df[all_features]

# Label encoding for non-numeric columns (except target 'Common Name')
non_numeric_columns = df.select_dtypes(exclude=['int64', 'float64']).columns
non_numeric_columns = [col for col in non_numeric_columns if col != 'Common Name']

label_encoders = {}
for col in non_numeric_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Map 'Common Name' to unique IDs
common_name_dict = {name: idx for idx, name in enumerate(df['Common Name'].unique(), start=1)}
reverse_common_name_dict = {v: k for k, v in common_name_dict.items()}
df['Common Name'] = df['Common Name'].map(common_name_dict)

# Split dataset
X = df.drop('Common Name', axis=1)
y = df['Common Name']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train RandomForest model
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)
y_pred = rf_model.predict(X_test)
print("RandomForest Accuracy:", accuracy_score(y_test, y_pred))

# Prediction function
def predict_plant(features):
    input_df = pd.DataFrame([features], columns=X.columns)
    input_scaled = scaler.transform(input_df)
    prediction = rf_model.predict(input_scaled)
    return reverse_common_name_dict.get(prediction[0], "Unknown")

# User input for features
def get_user_input():
    user_input = {}
    print("\nEnter feature values for plant recommendation:")
    for feature in X.columns:
        if feature in label_encoders:
            encoder = label_encoders[feature]
            unique_values = encoder.classes_
            print(f"Possible values for '{feature}': {list(unique_values)}")
            while True:
                value = input(f"Enter value for '{feature}': ")
                if value in unique_values:
                    user_input[feature] = encoder.transform([value])[0]
                    break
                else:
                    print(f"Invalid input! Please choose from: {list(unique_values)}")
        else:
            while True:
                try:
                    value = float(input(f"Enter numeric value for '{feature}': "))
                    user_input[feature] = value
                    break
                except ValueError:
                    print(f"Invalid input! Please enter a numeric value.")
    return user_input

# Get user input and predict
user_input = get_user_input()
prediction = predict_plant(list(user_input.values()))
print(f"\nRecommended Plant: {prediction}")


RandomForest Accuracy: 0.9777777777777777

Enter feature values for plant recommendation:
Possible values for 'Scientific Name': ['Capsicum annuum', 'Coriandrum sativum', 'Cucurbita pepo', 'Daucus carota', 'Lactuca sativa', 'Mentha', 'Ocimum basilicum', 'Raphanus sativus', 'Solanum lycopersicum', 'Spinacia oleracea']


Enter value for 'Scientific Name':  Mentha


Possible values for 'Growth Type': ['Herb', 'Shrub', 'Vine']


Enter value for 'Growth Type':  Herb


Possible values for 'Medicinal Use': ['No', 'Yes']


Enter value for 'Medicinal Use':  No
Enter numeric value for 'Optimal Temperature (°C)':  15
Enter numeric value for 'Humidity Preference (%)':  65


Possible values for 'Preferred Soil Type': ['Clay', 'Loam', 'Peaty', 'Sandy', 'Silt']


Enter value for 'Preferred Soil Type':  Loam
Enter numeric value for 'Soil pH Range':  7.8
Enter numeric value for 'Germination Time (Days)':  6
Enter numeric value for 'Time to Harvest (Days)':  6


Possible values for 'Common Pests': ['Aphids', 'Bacterial', 'Caterpillars', 'Fungal', nan]


Enter value for 'Common Pests':  Fungal


Possible values for 'Nitrogen Need': ['High', 'Low', 'Medium']


Enter value for 'Nitrogen Need':  Low


Possible values for 'Phosphorus Need': ['High', 'Low', 'Medium']


Enter value for 'Phosphorus Need':  Low


Possible values for 'Potassium Need': ['High', 'Low', 'Medium']


Enter value for 'Potassium Need':  Low


Possible values for 'Suitable for Region': ['Bangalore', 'Chennai', 'Delhi', 'Hyderabad', 'Mumbai']


Enter value for 'Suitable for Region':  Delhi


Possible values for 'Suitable for Season': ['Monsoon', 'Summer', 'Winter']


Enter value for 'Suitable for Season':  Summer



Recommended Plant: Lettuce


In [3]:
import pickle

# Assuming you already trained your model and preprocessing steps
# `rf_model`, `scaler`, `label_encoders`, `common_name_dict` should be defined

# Save the trained RandomForest model
with open('plant_model.pkl', 'wb') as f:
    pickle.dump(rf_model, f)

# Save the feature scaler
with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

# Save label encoders for categorical features
with open('label_encoders.pkl', 'wb') as f:
    pickle.dump(label_encoders, f)

# Save common name mapping dictionary
with open('common_name_dict.pkl', 'wb') as f:
    pickle.dump(common_name_dict, f)

print("Model and preprocessing files saved successfully!")


Model and preprocessing files saved successfully!
