In [1]:
# All required libraries are imported here 
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics

# Load the dataset
crops = pd.read_csv("soil_measures.csv")

crops.head()

Unnamed: 0,N,P,K,ph,crop
0,90,42,43,6.502985,rice
1,85,58,41,7.038096,rice
2,60,55,44,7.840207,rice
3,74,35,40,6.980401,rice
4,78,42,42,7.628473,rice


Checking for missing values

In [114]:
crops.isna().sum()

N       0
P       0
K       0
ph      0
crop    0
dtype: int64

Cheacking The Data

In [115]:
crops.dtypes

N         int64
P         int64
K         int64
ph      float64
crop     object
dtype: object

In [116]:
crops["crop"].value_counts()

rice           100
maize          100
jute           100
cotton         100
coconut        100
papaya         100
orange         100
apple          100
muskmelon      100
watermelon     100
grapes         100
mango          100
banana         100
pomegranate    100
lentil         100
blackgram      100
mungbean       100
mothbeans      100
pigeonpeas     100
kidneybeans    100
chickpea       100
coffee         100
Name: crop, dtype: int64

Creating Features and target variables then Use train_test_split to split the data into 80-20.

In [117]:
X=crops.drop("crop",axis=1).values
y = crops["crop"].astype('category').cat.codes
X_train, X_test, y_train, y_test=train_test_split(X , y , test_size= 0.2, random_state=12, stratify=y)

Building the model and predicting the best crop for each feature individually

In [118]:
feature_performance = {}

for i, feature in enumerate(["N", "P", "K", "ph"]):  
    log_reg = LogisticRegression(multi_class="multinomial", solver="lbfgs", max_iter=200)
    
    X_train_feature = X_train[:, i].reshape(-1, 1)  
    X_test_feature = X_test[:, i].reshape(-1, 1)  
    
    log_reg.fit(X_train_feature, y_train)
    y_pred = log_reg.predict(X_test_feature)
    
    accuracy = metrics.accuracy_score(y_test, y_pred)
    feature_performance[feature] = accuracy

Store the feature name as a key with the best model's evaluation score.

In [119]:
best_feature, best_accuracy = max(feature_performance.items(), key=lambda x: x[1])

# Store only the best feature in a dictionary
best_predictive_feature = {best_feature: best_accuracy}

print(best_predictive_feature)

{'K': 0.3181818181818182}
