# Fertilizer Recommendation with Logistic Regression
A full data science workflow for predicting the best fertilizer.

In [None]:
# Install packages if needed
%pip install pandas scikit-learn numpy --quiet

import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression

## 1. Load Data

In [None]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
train.head()

## 2. Data Overview

In [None]:
print('Train shape:', train.shape)
print('Test shape:', test.shape)
print('
Missing values in train:')
print(train.isnull().sum())

## 3. Preprocessing

In [None]:
target = 'Fertilizer Name'
le = LabelEncoder()
train[target] = le.fit_transform(train[target])

cat_features = ['Soil Type', 'Crop Type']
num_features = ['Temparature','Humidity','Moisture','Nitrogen','Potassium','Phosphorous']
preprocessor = ColumnTransformer([('cat', OneHotEncoder(handle_unknown='ignore'), cat_features),('num', StandardScaler(), num_features)])

## 4. MAP@3 Metric

In [None]:
def mapk(actual, predicted, k=3):
    score = 0.0
    for a, p in zip(actual, predicted):
        p = p[:k]
        if a in p:
            score += 1.0 / (p.index(a) + 1)
    return score / len(actual)

## 5. Cross-validation

In [None]:
X = train.drop(columns=[target, 'id'])
y = train[target]

skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
cv_scores = []
for fold, (tr_idx, val_idx) in enumerate(skf.split(X, y), 1):
    X_train, X_val = X.iloc[tr_idx], X.iloc[val_idx]
    y_train, y_val = y.iloc[tr_idx], y.iloc[val_idx]
    clf = Pipeline([('preprocess', preprocessor),('model', LogisticRegression(max_iter=200, multi_class='multinomial'))])
    clf.fit(X_train, y_train)
    proba = clf.predict_proba(X_val)
    top3 = np.argsort(proba, axis=1)[:, ::-1][:,:3]
    cv_score = mapk(list(y_val), top3.tolist(), k=3)
    cv_scores.append(cv_score)
    print(f'Fold {fold} MAP@3: {cv_score:.4f}')
print('Mean CV MAP@3:', np.mean(cv_scores))

## 6. Train Final Model

In [None]:
final_clf = Pipeline([('preprocess', preprocessor),('model', LogisticRegression(max_iter=200, multi_class='multinomial'))])
final_clf.fit(X, y)

## 7. Prediction and Submission

In [None]:
test_ids = test['id']
X_test = test.drop(columns=['id'])
proba = final_clf.predict_proba(X_test)
top3 = np.argsort(proba, axis=1)[:, ::-1][:,:3]
pred_labels = [[le.inverse_transform([c])[0] for c in row] for row in top3]
submission = pd.DataFrame({'id': test_ids, 'Fertilizer Name': [' '.join(p) for p in pred_labels]})
submission.to_csv('submission.csv', index=False)
submission.head()