# Crop Recommender- Pytorch Annual Hackathon

![](https://1.bp.blogspot.com/-HmqApqMJ_cc/XQvkp0_QUpI/AAAAAAAAAQc/cxCPibnJ5g4k_EjZWmTvr_wDAF7lCa8uQCLcBGAs/s1600/Tractor%2B1.JPG)

# Background

Smart farming is currently popular. Precision agriculture is a contemporary agricultural strategy that combines data from soil characteristics, soil types, crop production statistics, and meteorological variables to recommend the best crop to farmers for optimum yield and profit. This method can help farmers make more informed decisions about their agricultural strategy by reducing crop failures.

To ease the agricultural crisis in the current state of affairs, stronger recommendation systems are required to assist farmers in making educated decisions before beginning crop production.

# Objective
**To propose optimal crops for farmers to plant based on a variety of characteristics and to assist them in making an educated decision prior to cultivation.**

# Vision after Dataset 

The data used in this project is made by augmenting and combining various publicly available datasets of India like weather, soil, etc. You can access the dataset [here](https://www.kaggle.com/atharvaingle/crop-recommendation-dataset). This data is relatively simple with very few but useful features unlike the complicated features affecting the yield of the crop.

The data have Nitrogen, Phosphorous, Pottasium and pH values of the soil. Also, it also contains the humidity, temperature and rainfall required for a particular crop. 

In [46]:
# Importing libraries

from __future__ import print_function
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report
from sklearn import metrics
from sklearn import tree
import warnings
warnings.filterwarnings('ignore')

# Reading and Visualizing the Dataset

In [47]:
PATH = '../input/crop-recommendation-dataset/Crop_recommendation.csv'
df = pd.read_csv(PATH)

In [48]:
df.head()

In [49]:
df.tail()

In [50]:
df.size

In [51]:
df.shape

In [52]:
df.columns

In [53]:
df['label'].unique()

In [54]:
df.dtypes

In [55]:
df['label'].value_counts()

In [56]:
sns.heatmap(df.corr(),annot=True)

## Preprocessing the data

In [57]:
from sklearn import preprocessing
 
# label_encoder object knows how to understand word labels.
label_encoder = preprocessing.LabelEncoder()

df['labelE']= label_encoder.fit_transform(df['label'].values)

df2=df.copy()

df2=df2.drop_duplicates('label')

df2

In [58]:
df['labelE'].unique()

In [59]:
# splitting feature columns and target column
features = df[['N', 'P','K','temperature', 'humidity', 'ph', 'rainfall']]
target = df['labelE']
labels = df['labelE']

In [60]:
# Initializing empty lists to append all model's name and corresponding name
acc = []
model = []

# PyTorch Model Setup and Training


In [61]:
# function to get the count of classes
def get_class_distribution(obj):
    count_dict = {
        'rice':0, 'maize':0, 'chickpea':0, 'kidneybeans':0, 'pigeonpeas':0,
       'mothbeans':0, 'mungbean':0, 'blackgram':0, 'lentil':0, 'pomegranate':0,
       'banana':0, 'mango':0, 'grapes':0, 'watermelon':0, 'muskmelon':0, 'apple':0,
       'orange':0, 'papaya':0, 'coconut':0, 'cotton':0, 'jute':0, 'coffee':0
    }
    
    for i in obj:
        if i == 0: 
            count_dict['apple'] += 1
        elif i == 1: 
            count_dict['banana'] += 1
        elif i == 2: 
            count_dict['blackgram'] += 1
        elif i == 3: 
            count_dict['chickpea'] += 1
        elif i == 4: 
            count_dict['coconut'] += 1  
        elif i == 5: 
            count_dict['coffee'] += 1
        elif i == 6: 
            count_dict['cotton'] += 1  
        elif i == 7: 
            count_dict['grapes'] += 1  
        elif i == 8: 
            count_dict['jute'] += 1  
        elif i == 9: 
            count_dict['kidneybeans'] += 1  
        elif i == 10: 
            count_dict['lentil'] += 1  
        elif i == 11: 
            count_dict['maize'] += 1  
        elif i == 12: 
            count_dict['mango'] += 1  
        elif i == 13: 
            count_dict['mothbeans'] += 1  
        elif i == 14: 
            count_dict['mungbean'] += 1  
        elif i == 15: 
            count_dict['muskmelon'] += 1  
        elif i == 16: 
            count_dict['orange'] += 1  
        elif i == 17: 
            count_dict['papaya'] += 1  
        elif i == 18: 
            count_dict['pigeonpeas'] += 1  
        elif i == 19: 
            count_dict['pomegranate'] += 1  
        elif i == 20: 
            count_dict['rice'] += 1
        elif i == 21: 
            count_dict['watermelon'] += 1
        else:
            print(i)
            print("\nCheck classes.")
            
    return count_dict

In [62]:
# !Importing torch libraries
from tqdm.notebook import tqdm
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler

In [63]:
# Splitting into train and test data

from sklearn.model_selection import train_test_split
# Split into train+val and test
X_trainval, X_test, y_trainval, y_test = train_test_split(features, target, test_size=0.2, stratify=target, random_state=69)

# Split train into train-val
X_train, X_val, y_train, y_val = train_test_split(X_trainval, y_trainval, test_size=0.1, stratify=y_trainval, random_state=21)

X_train= np.array(X_train)
y_train= np.array(y_train)
X_val=np.array(X_val)
y_val=np.array(y_val)
X_test= np.array(X_test)
y_test =  np.array(y_test)

In [64]:
print(X_train.dtype)

In [65]:
print(y_train)

In [66]:
print(y_train.dtype)

In [67]:
# passing the test, train and sample data to the DataLoader
class ClassifierDataset(Dataset):
    
    def __init__(self, X_data, y_data):
        self.X_data = X_data
        self.y_data = y_data
        
    def __getitem__(self, index):
        return self.X_data[index], self.y_data[index]
        
    def __len__ (self):
        return len(self.X_data)


train_dataset = ClassifierDataset(torch.from_numpy(X_train).float(), torch.from_numpy(y_train).long())
val_dataset = ClassifierDataset(torch.from_numpy(X_val).float(), torch.from_numpy(y_val).long())
test_dataset = ClassifierDataset(torch.from_numpy(X_test).float(), torch.from_numpy(y_test).long())

In [68]:
target_list = []  # List of outputs
for _, t in train_dataset:
    target_list.append(t)
    
target_list = torch.tensor(target_list) # convert to tensor
target_list = target_list[torch.randperm(len(target_list))]

In [69]:
# getting the class count and class wights by reciprocal
class_count = [i for i in get_class_distribution(y_train).values()]
class_weights = 1./torch.tensor(class_count, dtype=torch.float) 
print(class_weights)


In [70]:
class_weights_all = class_weights[target_list]


In [71]:
# oversampling mini batch with class of less values
weighted_sampler = WeightedRandomSampler(
    weights=class_weights_all,
    num_samples=len(class_weights_all),
    replacement=True
)

In [72]:
# defining number of epochs and batch size model params
EPOCHS = 300
BATCH_SIZE = 16
LEARNING_RATE = 0.0007
NUM_FEATURES = len(features.columns)
NUM_CLASSES = 22

In [73]:
# loading datasets for torch compatibility
train_loader = DataLoader(dataset=train_dataset,
                          batch_size=BATCH_SIZE,
                          sampler=weighted_sampler
)
val_loader = DataLoader(dataset=val_dataset, batch_size=1)

test_loader = DataLoader(dataset=test_dataset, batch_size=1)

In [74]:
# Defining classifier torch class
class CropClassification(nn.Module):
    def __init__(self, num_feature, num_class):
        super(CropClassification, self).__init__()
        
        self.layer_1 = nn.Linear(num_feature, 512)
        self.layer_2 = nn.Linear(512, 128)
        self.layer_3 = nn.Linear(128, 64)
        self.layer_out = nn.Linear(64, num_class) 
        
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.2)
        self.batchnorm1 = nn.BatchNorm1d(512)
        self.batchnorm2 = nn.BatchNorm1d(128)
        self.batchnorm3 = nn.BatchNorm1d(64)
        
    def forward(self, x):
        x = self.layer_1(x)
        x = self.batchnorm1(x)
        x = self.relu(x)
        
        x = self.layer_2(x)
        x = self.batchnorm2(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        x = self.layer_3(x)
        x = self.batchnorm3(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        x = self.layer_out(x)
        
        return x

In [75]:
# Setting the device to train the model on
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

In [76]:
# creating model class instance and printing
model = CropClassification(num_feature = NUM_FEATURES, num_class=NUM_CLASSES)
model.to(device)

criterion = nn.CrossEntropyLoss(weight=class_weights.to(device))
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
print(model)

In [77]:
# defining the model accuracy function during train
def multi_acc(y_pred, y_test):
    y_pred_softmax = torch.log_softmax(y_pred, dim = 1)
    _, y_pred_tags = torch.max(y_pred_softmax, dim = 1)    
    
    correct_pred = (y_pred_tags == y_test).float()
    acc = correct_pred.sum() / len(correct_pred)
    
    acc = torch.round(acc * 100)
    
    return acc

In [78]:
# Declaring the accuracy and loss objects holding list of values for every epoch
accuracy_stats = {
    'train': [],
    "val": []
}
loss_stats = {
    'train': [],
    "val": []
}

In [79]:
print("Begin training.")
for e in tqdm(range(1, EPOCHS+1)):
    
    # TRAINING
    train_epoch_loss = 0
    train_epoch_acc = 0
    model.train()
    for X_train_batch, y_train_batch in train_loader:
        X_train_batch, y_train_batch = X_train_batch.to(device), y_train_batch.to(device)
        optimizer.zero_grad()
        
        y_train_pred = model(X_train_batch)
        
        train_loss = criterion(y_train_pred, y_train_batch)
        train_acc = multi_acc(y_train_pred, y_train_batch)
        
        train_loss.backward()
        optimizer.step()
        
        train_epoch_loss += train_loss.item()
        train_epoch_acc += train_acc.item()
        
        
    # VALIDATION    
    with torch.no_grad():
        
        val_epoch_loss = 0
        val_epoch_acc = 0
        
        model.eval()
        for X_val_batch, y_val_batch in val_loader:
            X_val_batch, y_val_batch = X_val_batch.to(device), y_val_batch.to(device)
            
            y_val_pred = model(X_val_batch)
                        
            val_loss = criterion(y_val_pred, y_val_batch)
            val_acc = multi_acc(y_val_pred, y_val_batch)
            
            val_epoch_loss += val_loss.item()
            val_epoch_acc += val_acc.item()
    loss_stats['train'].append(train_epoch_loss/len(train_loader))
    loss_stats['val'].append(val_epoch_loss/len(val_loader))
    accuracy_stats['train'].append(train_epoch_acc/len(train_loader))
    accuracy_stats['val'].append(val_epoch_acc/len(val_loader))
                              
    
    print(f'Epoch {e+0:03}: | Train Loss: {train_epoch_loss/len(train_loader):.5f} | Val Loss: {val_epoch_loss/len(val_loader):.5f} | Train Acc: {train_epoch_acc/len(train_loader):.3f}| Val Acc: {val_epoch_acc/len(val_loader):.3f}')


In [80]:
# Visaulizing the Loss curve
fig = plt.figure(figsize=(20,10))
plt.title("Loss Curve-Train vs Validation")
plt.plot( loss_stats['train'], label='train')
plt.plot( loss_stats['val'], label='validation')
plt.xlabel('num_epochs', fontsize=12)
plt.ylabel('loss', fontsize=12)
plt.legend(loc='best')

In [81]:
y_pred_list = []
with torch.no_grad():
    model.eval()
    for X_batch, _ in test_loader:
        X_batch = X_batch.to(device)
        y_test_pred = model(X_batch)
#         print(torch.topk(y_test_pred,6,dim=1))
#         y_test_pred
        _, y_pred_tags = torch.max(y_test_pred, dim = 1)
        y_pred_list.append(y_pred_tags.cpu().numpy())
y_pred_list = [a.squeeze().tolist() for a in y_pred_list]

In [82]:
print(classification_report(y_test, y_pred_list))


## Saving and Loading the torch model

In [83]:
PATH_OP = 'cropRecommender.pth'

print("!Begin Saving")


torch.save(model.state_dict(), PATH_OP)

print('!Model Saved')
# Model class must be defined somewhere
model2 = CropClassification(num_feature = NUM_FEATURES, num_class=NUM_CLASSES)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model2.to(device)
model2.load_state_dict(torch.load(PATH_OP))

model2.eval()


In [84]:
y_pred_list = []
with torch.no_grad():
    model2.eval()
    for X_batch, _ in test_loader:
        X_batch = X_batch.to(device)
        y_test_pred = model2(X_batch)
        _, y_pred_tags = torch.max(y_test_pred, dim = 1)
        y_pred_list.append(y_pred_tags.cpu().numpy())
y_pred_list = [a.squeeze().tolist() for a in y_pred_list]

In [85]:
print(classification_report(y_test, y_pred_list))
