In [4]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.ensemble import RandomForestClassifier
# Load data
df = pd.read_csv('trainfertiliser.csv')

# Preprocessing
# Encode categorical variables
le_soil = LabelEncoder()
le_crop = LabelEncoder()
le_fertilizer = LabelEncoder()


df['Soil Type'] = le_soil.fit_transform(df['Soil Type'])
df['Crop Type'] = le_crop.fit_transform(df['Crop Type'])
df['Fertilizer Name'] = le_fertilizer.fit_transform(df['Fertilizer Name'])
# Split features and target
X = df[['id', 'Temparature','Humidity','Moisture','Soil Type','Crop Type','Nitrogen','Potassium', 'Phosphorous']]
y = df['Fertilizer Name']


# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
df.isnull().sum()

id                 0
Temparature        0
Humidity           0
Moisture           0
Soil Type          0
Crop Type          0
Nitrogen           0
Potassium          0
Phosphorous        0
Fertilizer Name    0
dtype: int64

In [6]:
# Create classifier
rf = RandomForestClassifier(
    n_estimators=100,  # number of trees
    criterion='gini',  # or 'entropy'
    max_depth=2,    # maximum depth of trees
    min_samples_split=2,
    min_samples_leaf=1,
    max_features='sqrt',  # number of features to consider at each split
    random_state=42
)

# Train model
rf.fit(X_train, y_train)

# Make predictions
y_pred = rf.predict(X_test)



In [7]:
# Evaluate model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=le_fertilizer.classes_))

Accuracy: 0.15612

Classification Report:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

    10-26-26       0.16      0.21      0.18     22841
    14-35-14       0.15      0.73      0.25     22639
    17-17-17       0.19      0.01      0.03     22764
       20-20       0.17      0.01      0.03     22010
       28-28       0.16      0.07      0.10     22384
         DAP       0.00      0.00      0.00     19148
        Urea       0.00      0.00      0.00     18214

    accuracy                           0.16    150000
   macro avg       0.12      0.15      0.08    150000
weighted avg       0.13      0.16      0.09    150000



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [28]:
test_data = pd.read_csv("testfertiliser.csv")
test_data.shape

from sklearn.preprocessing import LabelEncoder

# Initialize encoders
le_soil = LabelEncoder()
le_crop = LabelEncoder()

# Fit AND transform on training data (this creates the classes_ attribute)
df['Soil Type'] = le_soil.fit_transform(df['Soil Type'])
df['Crop Type'] = le_crop.fit_transform(df['Crop Type'])

# Now you can safely transform test data
test_data['Soil Type'] = test_data['Soil Type'].apply(
    lambda x: le_soil.transform([x])[0] if x in le_soil.classes_ else -1
)
test_data['Crop Type'] = test_data['Crop Type'].apply(
    lambda x: le_crop.transform([x])[0] if x in le_crop.classes_ else -1
)

In [29]:
test_data

Unnamed: 0,id,Temparature,Humidity,Moisture,Soil Type,Crop Type,Nitrogen,Potassium,Phosphorous
0,750000,31,70,52,-1,-1,34,11,24
1,750001,27,62,45,-1,-1,30,14,15
2,750002,28,72,28,-1,-1,14,15,4
3,750003,37,53,57,-1,-1,18,17,36
4,750004,31,55,32,-1,-1,13,19,14
...,...,...,...,...,...,...,...,...,...
249995,999995,26,66,30,-1,-1,14,7,18
249996,999996,33,62,55,-1,-1,28,14,7
249997,999997,36,53,64,-1,-1,28,11,27
249998,999998,36,67,26,-1,-1,33,0,10


In [30]:
test_data

Unnamed: 0,id,Temparature,Humidity,Moisture,Soil Type,Crop Type,Nitrogen,Potassium,Phosphorous
0,750000,31,70,52,-1,-1,34,11,24
1,750001,27,62,45,-1,-1,30,14,15
2,750002,28,72,28,-1,-1,14,15,4
3,750003,37,53,57,-1,-1,18,17,36
4,750004,31,55,32,-1,-1,13,19,14
...,...,...,...,...,...,...,...,...,...
249995,999995,26,66,30,-1,-1,14,7,18
249996,999996,33,62,55,-1,-1,28,14,7
249997,999997,36,53,64,-1,-1,28,11,27
249998,999998,36,67,26,-1,-1,33,0,10


In [31]:
y_test = rf.predict(test_data)

In [32]:
y_test

array([1, 1, 1, ..., 1, 1, 1])

In [33]:
# 1. First create and FIT an encoder instance
le = LabelEncoder()
le.fit(y_train)  # Or use fit_transform if you're encoding training data

# 2. Later, when you need to inverse transform
y_decoded = le.inverse_transform(y_test)  # Calling on instance

In [34]:
y_decoded

array([1, 1, 1, ..., 1, 1, 1])

In [35]:
# Assuming you previously did:
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
le.fit_transform(y_train) 
y_inverse = le.inverse_transform(y_decoded)  # Use the SAME 'le' instance

In [36]:
y_inverse
ID=test_data['id']
test_data.drop(['id'],axis=1,inplace=True)

In [37]:
result = pd.DataFrame({
    'ID': ID,
    'class': y_decoded
})

In [38]:
result

Unnamed: 0,ID,class
0,750000,1
1,750001,1
2,750002,1
3,750003,1
4,750004,0
...,...,...
249995,999995,1
249996,999996,1
249997,999997,1
249998,999998,1


In [39]:
result.to_csv("submission.csv", index=False)