In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
import numpy as np
import warnings
warnings.filterwarnings('ignore')
data = pd.read_csv('faulty_crop_dataset.csv')

data.head()
data.drop(columns=['Link'], inplace=True)

# Encode categorical features: District_Name, Soil_color
data = pd.get_dummies(data, columns=['District_Name', 'Soil_color'])

# Encode target labels
le_crop = LabelEncoder()
le_fert = LabelEncoder()
data['Crop'] = le_crop.fit_transform(data['Crop'])
data['Fertilizer'] = le_fert.fit_transform(data['Fertilizer'])

# Features and labels
X = data.drop(columns=['Crop', 'Fertilizer'])
y_crop = data['Crop']
y_fert = data['Fertilizer']

# Split into train/test
X_train, X_test, y_crop_train, y_crop_test, y_fert_train, y_fert_test = train_test_split(
    X, y_crop, y_fert, test_size=0.25, random_state=42
)

# Metric calculation function
def evaluate(model, X_train, X_test, y_crop_train, y_crop_test, y_fert_train, y_fert_test):
    # Crop prediction
    model.fit(X_train, y_crop_train)
    y_crop_pred = model.predict(X_test)

    # Fertilizer prediction
    model.fit(X_train, y_fert_train)
    y_fert_pred = model.predict(X_test)

    # Metrics for crop
    acc_crop = accuracy_score(y_crop_test, y_crop_pred)
    prec_crop = precision_score(y_crop_test, y_crop_pred, average='weighted')
    rec_crop = recall_score(y_crop_test, y_crop_pred, average='weighted')
    f1_crop = f1_score(y_crop_test, y_crop_pred, average='weighted')

    # Metrics for fertilizer
    acc_fert = accuracy_score(y_fert_test, y_fert_pred)
    prec_fert = precision_score(y_fert_test, y_fert_pred, average='weighted')
    rec_fert = recall_score(y_fert_test, y_fert_pred, average='weighted')
    f1_fert = f1_score(y_fert_test, y_fert_pred, average='weighted')

    # Correct overall accuracy (both correct)
    acc_overall = np.mean((y_crop_pred == y_crop_test) & (y_fert_pred == y_fert_test))

    return {
        'accuracy_crop': acc_crop,
        'accuracy_fert': acc_fert,
        'accuracy_overall': acc_overall,
        'precision_overall': np.mean([prec_crop, prec_fert]),
        'recall_overall': np.mean([rec_crop, rec_fert]),
        'f1_overall': np.mean([f1_crop, f1_fert])
    }

# Define models
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "KNN": KNeighborsClassifier(),
    "SVM (RBF)": SVC(kernel='rbf'),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
}

# Evaluate and store results
results = pd.DataFrame()
for name, model in models.items():
    m = evaluate(model, X_train, X_test, y_crop_train, y_crop_test, y_fert_train, y_fert_test)
    m['Model'] = name
    results = pd.concat([results, pd.DataFrame([m])], ignore_index=True)

# Final table
results.set_index('Model', inplace=True)
results

Unnamed: 0_level_0,accuracy_crop,accuracy_fert,accuracy_overall,precision_overall,recall_overall,f1_overall
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Logistic Regression,0.648361,0.280779,0.234721,0.494079,0.46457,0.464539
KNN,0.749336,0.434898,0.387068,0.606478,0.592117,0.593892
SVM (RBF),0.464128,0.301151,0.160319,0.282073,0.38264,0.29839
XGBoost,0.845881,0.720106,0.697077,0.804189,0.782994,0.788281
