In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [7]:
df = pd.read_csv("plant_recommendation_dataset.csv")

In [9]:
df.head()

Unnamed: 0,Common Name,Scientific Name,Growth Type,Edible,Medicinal Use,Optimal Temperature (°C),Humidity Preference (%),Sunlight Requirement,Climate Suitability,Preferred Soil Type,...,Companion Planting Suitability,Pruning Requirement,Common Pests,Disease Resistance,Nitrogen Need,Phosphorus Need,Potassium Need,Recommended Organic Fertilizers,Suitable for Region,Suitable for Season
0,Mint,Mentha,Herb,Yes,Yes,23,81,Partial Shade,Temperate,Sandy,...,Bad,No,Bacterial,,Medium,Medium,High,Compost,Bangalore,Monsoon
1,Carrot,Daucus carota,Herb,Yes,No,15,53,Partial Shade,Arid,Sandy,...,Good,No,Caterpillars,Viral,Medium,Low,Medium,Vermicompost,Chennai,Monsoon
2,Coriander,Coriandrum sativum,Herb,Yes,Yes,15,65,Shade,Subtropical,Silt,...,Good,Yes,Caterpillars,,Low,High,High,Compost,Delhi,Summer
3,Coriander,Coriandrum sativum,Herb,Yes,Yes,31,47,Full Sun,Tropical,Peaty,...,Bad,No,Bacterial,Bacterial,High,Low,Medium,Compost,Hyderabad,Winter
4,Radish,Raphanus sativus,Herb,Yes,No,26,60,Shade,Arid,Loam,...,Good,No,Bacterial,Fungal,Medium,High,Low,Compost,Delhi,Winter


In [11]:
# Remove 'Disease Resistance' and 'Pruning Requirement' while keeping all other features
all_features = [col for col in df.columns if col not in ['Disease Resistance', 'Pruning Requirement','Companion Planting Suitability', 'Water Requirement', 'Irrigation Frequency','Recommended Organic Fertilizers','Climate Suitability','Sunlight Requirement','Edible' ]]

# Keep only selected features
df = df[all_features]

# Identify non-numeric columns except 'Common Name'
non_numeric_columns = df.select_dtypes(exclude=['int64', 'float64']).columns
non_numeric_columns = [col for col in non_numeric_columns if col != 'Common Name']

# Apply Label Encoding only to categorical columns (excluding 'Common Name')
label_encoders = {}
for col in non_numeric_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le  # Store encoder for future use

# Ensure 'Common Name' remains a string
df['Common Name'] = df['Common Name'].astype(str)

# Print updated features
print("All Features:", all_features)
print(df.dtypes)

All Features: ['Common Name', 'Scientific Name', 'Growth Type', 'Medicinal Use', 'Optimal Temperature (°C)', 'Humidity Preference (%)', 'Preferred Soil Type', 'Soil pH Range', 'Germination Time (Days)', 'Time to Harvest (Days)', 'Common Pests', 'Nitrogen Need', 'Phosphorus Need', 'Potassium Need', 'Suitable for Region', 'Suitable for Season']
Common Name                  object
Scientific Name               int32
Growth Type                   int32
Medicinal Use                 int32
Optimal Temperature (°C)      int64
Humidity Preference (%)       int64
Preferred Soil Type           int32
Soil pH Range               float64
Germination Time (Days)       int64
Time to Harvest (Days)        int64
Common Pests                  int32
Nitrogen Need                 int32
Phosphorus Need               int32
Potassium Need                int32
Suitable for Region           int32
Suitable for Season           int32
dtype: object


In [13]:
df['Common Name'].value_counts()


Common Name
Coriander    50
Tomato       49
Mint         48
Carrot       47
Basil        47
Spinach      44
Pumpkin      43
Lettuce      43
Chili        40
Radish       39
Name: count, dtype: int64

In [15]:
df['Common Name'].unique().size

10

In [17]:
df['Common Name'].unique()

array(['Mint', 'Carrot', 'Coriander', 'Radish', 'Spinach', 'Pumpkin',
       'Chili', 'Tomato', 'Lettuce', 'Basil'], dtype=object)

In [19]:
# Generate a unique mapping for Common Name
common_name_dict = {name: idx for idx, name in enumerate(df['Common Name'].unique(), start=1)}

# Apply the mapping
df['Common Name'] = df['Common Name'].map(common_name_dict)

# Verify the changes
print(common_name_dict)  # Print the dictionary mapping
print(df.head())  # Check if Common Name is now numerical


{'Mint': 1, 'Carrot': 2, 'Coriander': 3, 'Radish': 4, 'Spinach': 5, 'Pumpkin': 6, 'Chili': 7, 'Tomato': 8, 'Lettuce': 9, 'Basil': 10}
   Common Name  Scientific Name  Growth Type  Medicinal Use  \
0            1                5            0              1   
1            2                3            0              0   
2            3                1            0              1   
3            3                1            0              1   
4            4                7            0              0   

   Optimal Temperature (°C)  Humidity Preference (%)  Preferred Soil Type  \
0                        23                       81                    3   
1                        15                       53                    3   
2                        15                       65                    4   
3                        31                       47                    2   
4                        26                       60                    1   

   Soil pH Range  Germin

In [21]:
X=df.drop('Common Name', axis = 1)
y=df['Common Name']

In [23]:
X.head()

Unnamed: 0,Scientific Name,Growth Type,Medicinal Use,Optimal Temperature (°C),Humidity Preference (%),Preferred Soil Type,Soil pH Range,Germination Time (Days),Time to Harvest (Days),Common Pests,Nitrogen Need,Phosphorus Need,Potassium Need,Suitable for Region,Suitable for Season
0,5,0,1,23,81,3,6.1,20,63,1,2,2,0,0,0
1,3,0,0,15,53,3,5.5,20,104,2,2,1,2,1,0
2,1,0,1,15,65,4,7.4,6,41,2,1,0,0,2,1
3,1,0,1,31,47,2,5.7,6,45,1,0,1,2,3,2
4,7,0,0,26,60,1,6.8,11,118,1,2,0,1,2,2


In [25]:
y.head()

0    1
1    2
2    3
3    3
4    4
Name: Common Name, dtype: int64

In [27]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=42)

In [29]:
from sklearn.preprocessing import MinMaxScaler
mx = MinMaxScaler()
X_train = mx.fit_transform(X_train)
X_test = mx.transform(X_test)

In [31]:
X_train

array([[0.        , 0.5       , 0.        , ..., 0.        , 0.25      ,
        0.        ],
       [0.88888889, 0.        , 0.        , ..., 0.        , 0.5       ,
        0.        ],
       [0.33333333, 0.        , 0.        , ..., 0.5       , 0.        ,
        0.5       ],
       ...,
       [0.88888889, 0.        , 0.        , ..., 0.5       , 0.5       ,
        0.5       ],
       [0.77777778, 0.        , 0.        , ..., 0.        , 1.        ,
        0.        ],
       [0.77777778, 0.        , 0.        , ..., 1.        , 0.        ,
        0.5       ]])

In [33]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
sc.fit(X_train)
X_train = sc.transform(X_train)
X_test=sc.transform(X_test)

In [35]:
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier

In [37]:
models = {
    'LogisticRegression': LogisticRegression(),
    'SVC':SVC(),
    'KNeighborsClassifier':KNeighborsClassifier(),
    'ExtraTreeClassifier':ExtraTreeClassifier(),
    'RandomForestClassifier':RandomForestClassifier(),
    'AdaBoostClassifier':AdaBoostClassifier()
}

In [39]:
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    score = accuracy_score(y_test, y_pred)
    print(f"{name} model with accuracy: {score}")

LogisticRegression model with accuracy: 0.8333333333333334
SVC model with accuracy: 0.7222222222222222
KNeighborsClassifier model with accuracy: 0.4666666666666667
ExtraTreeClassifier model with accuracy: 0.6777777777777778
RandomForestClassifier model with accuracy: 0.9888888888888889
AdaBoostClassifier model with accuracy: 0.5111111111111111




In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load dataset
df = pd.read_csv("plant_recommendation_dataset.csv")

# Feature selection
all_features = [col for col in df.columns if col not in ['Disease Resistance', 'Pruning Requirement','Companion Planting Suitability', 'Water Requirement', 'Irrigation Frequency','Recommended Organic Fertilizers','Climate Suitability','Sunlight Requirement','Edible' ]]
df = df[all_features]

# Label encoding
non_numeric_columns = df.select_dtypes(exclude=['int64', 'float64']).columns
non_numeric_columns = [col for col in non_numeric_columns if col != 'Common Name']
label_encoders = {}
for col in non_numeric_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Map 'Common Name' to unique IDs
common_name_dict = {name: idx for idx, name in enumerate(df['Common Name'].unique(), start=1)}
df['Common Name'] = df['Common Name'].map(common_name_dict)

# Split dataset
X = df.drop('Common Name', axis=1)
y = df['Common Name']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scaling
mx = MinMaxScaler()
X_train = mx.fit_transform(X_train)
X_test = mx.transform(X_test)
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Train RandomForest model
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)
y_pred = rf_model.predict(X_test)
print("RandomForest Accuracy:", accuracy_score(y_test, y_pred))

# Prediction function
def predict_plant(features):
    input_df = pd.DataFrame([features], columns=X.columns)
    input_df = mx.transform(input_df)
    input_df = sc.transform(input_df)
    prediction = rf_model.predict(input_df)
    common_name = [name for name, idx in common_name_dict.items() if idx == prediction[0]]
    return common_name[0] if common_name else "Unknown"

# User input based on dataset values
print("\nEnter the feature values for plant recommendation (based on dataset columns):")
user_input = {}
for feature in X.columns:
    unique_values = df[feature].unique()
    print(f"Possible values for {feature}: {unique_values}")
    value = float(input(f"Enter {feature}: "))
    user_input[feature] = value

# Predict and display result
prediction = predict_plant(list(user_input.values()))
print(f"\nRecommended Plant: {prediction}")


RandomForest Accuracy: 0.9777777777777777

Enter the feature values for plant recommendation (based on dataset columns):
Possible values for Scientific Name: [5 3 1 7 9 2 0 8 4 6]


Enter Scientific Name:  1


Possible values for Growth Type: [0 2 1]


Enter Growth Type:  0


Possible values for Medicinal Use: [1 0]


Enter Medicinal Use:  1


Possible values for Optimal Temperature (°C): [23 15 31 26 18 17 28 27 22 29 21 32 34 35 16 20 30 19 33 24 25]


Enter Optimal Temperature (°C):  15


Possible values for Humidity Preference (%): [81 53 65 47 60 76 51 79 87 84 63 82 74 42 80 49 44 69 45 54 83 57 88 72
 86 43 77 70 46 58 85 62 78 61 73 89 71 52 40 75 68 56 41 50 48 90 64 66
 67 59 55]


Enter Humidity Preference (%):  40


Possible values for Preferred Soil Type: [3 4 2 1 0]


Enter Preferred Soil Type:  2


Possible values for Soil pH Range: [6.1 5.5 7.4 5.7 6.8 6.  5.9 7.3 6.2 6.9 7.2 7.  6.5 5.6 6.6 7.1 7.5 5.8
 6.7 6.3 6.4]


Enter Soil pH Range:  5.5


Possible values for Germination Time (Days): [20  6 11 14  8 10  5 13 19 15 12  7 18 16 17  9]


Enter Germination Time (Days):  11


Possible values for Time to Harvest (Days): [ 63 104  41  45 118  56  82  61  99 117  51 116  75  31  70  48  84  38
 105 119  46  40  67  81  80  95  88  57  94  72 103 120 106  73  93  83
  54  34  98 109  32  97  62  39  53 102 114  77  78  44 100  33  74 110
  96  43  49  79  52  55 107  87 101  89  69  37 111  36  42 112 115  68
  86  90  30 113  60  91  85  47  66  59  76  35  92  65  64  50  58  71]


Enter Time to Harvest (Days):  41


Possible values for Common Pests: [1 2 4 3 0]


Enter Common Pests:  1


Possible values for Nitrogen Need: [2 1 0]


Enter Nitrogen Need:  2


Possible values for Phosphorus Need: [2 1 0]


Enter Phosphorus Need:  2


Possible values for Potassium Need: [0 2 1]


Enter Potassium Need:  2


Possible values for Suitable for Region: [0 1 2 3 4]


Enter Suitable for Region:  2


Possible values for Suitable for Season: [0 1 2]


Enter Suitable for Season:  2



Recommended Plant: Coriander
