In [1]:
# import libraries
import boto3, re, sys, math, json, os, sagemaker, urllib.request
from sagemaker import get_execution_role
import numpy as np                                
import pandas as pd                               
import matplotlib.pyplot as plt                   
from IPython.display import Image                 
from IPython.display import display               
from time import gmtime, strftime                 
from sagemaker.predictor import csv_serializer   

# Define IAM role
role = get_execution_role()
prefix = 'sagemaker/DEMO-xgboost-dm'
containers = {'us-west-2': '433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:latest',
              'us-east-1': '811284229777.dkr.ecr.us-east-1.amazonaws.com/xgboost:latest',
              'us-east-2': '825641698319.dkr.ecr.us-east-2.amazonaws.com/xgboost:latest',
              'eu-west-1': '685385470294.dkr.ecr.eu-west-1.amazonaws.com/xgboost:latest',
              'ap-south-1': '720646828776.dkr.ecr.ap-south-1.amazonaws.com/xgboost:latest'} # each region has its XGBoost container
my_region = boto3.session.Session().region_name # set the region of the instance
print("Success - the MySageMakerInstance is in the " + my_region + " region. You will use the " + containers[my_region] + " container for your SageMaker endpoint.")

Success - the MySageMakerInstance is in the us-east-2 region. You will use the 825641698319.dkr.ecr.us-east-2.amazonaws.com/xgboost:latest container for your SageMaker endpoint.


In [2]:
bucket_name = 'crop-recommender-armageddon-v' # <--- CHANGE THIS VARIABLE TO A UNIQUE NAME FOR YOUR BUCKET
s3 = boto3.resource('s3')
try:
    if  my_region == 'us-east-1':
      s3.create_bucket(Bucket=bucket_name)
    else: 
      s3.create_bucket(Bucket=bucket_name, CreateBucketConfiguration={ 'LocationConstraint': my_region })
    print('S3 bucket created successfully')
except Exception as e:
    print('S3 error: ',e)

S3 error:  An error occurred (BucketAlreadyOwnedByYou) when calling the CreateBucket operation: Your previous request to create the named bucket succeeded and you already own it.


Upload data to the Bucket first!

In [116]:
try:
  model_data = pd.read_csv('./Hackathon_data_30_Oct.csv',index_col=None)
  model_data1 = pd.read_csv('./Hackathon_data_30_Oct.csv',index_col=None)
  print('Success: Data loaded into dataframe.')
except Exception as e:
    print('Data load error: ',e)

Success: Data loaded into dataframe.


In [4]:
# set an output path where the trained model will be saved
prefix = 'xgboost-as-a-built-in-algo'
output_path ='s3://{}/{}/output'.format(bucket_name, prefix)
print(output_path)

s3://crop-recommender-armageddon-v/xgboost-as-a-built-in-algo/output


In [5]:
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(missing_values=np.nan, strategy='median')
model_data[['Production']] = imputer.fit_transform(model_data[['Production']])

In [6]:
model_data = model_data.fillna(0)
model_data = model_data.drop(['TOTAL'],axis = 1)
model_data = model_data.drop(['Crop_Year'],axis = 1)

In [7]:
model_data = model_data[model_data.Crop != 'Other Vegetables']
model_data = model_data[model_data.Crop != 'Oilseeds total']
model_data = model_data[model_data.Crop != 'Other  Rabi pulses']
model_data = model_data[model_data.Crop != 'Other Fresh Fruits']
model_data = model_data[model_data.Crop != 'Other Kharif pulses']
model_data = model_data[model_data.Crop != 'other oilseeds']
model_data = model_data[model_data.Crop != 'Other Cereals & Millets']

In [124]:
model_data1 = model_data1[model_data.Crop != 'Other Vegetables']
model_data1 = model_data1[model_data.Crop != 'Oilseeds total']
model_data1 = model_data1[model_data.Crop != 'Other  Rabi pulses']
model_data1 = model_data1[model_data.Crop != 'Other Fresh Fruits']
model_data1 = model_data1[model_data.Crop != 'Other Kharif pulses']
model_data1 = model_data1[model_data.Crop != 'other oilseeds']
model_data1 = model_data1[model_data.Crop != 'Other Cereals & Millets']

  from ipykernel import kernelapp as app
  app.launch_new_instance()


In [8]:
len(model_data.Crop.unique())

54

In [10]:
model_data

Unnamed: 0,State_Name,District_Name,Season,Crop,Area,Production,Jan-Feb,Mar-May,Jun-Sep,Oct-Dec,...,DIESEL PUMP,WIND MILLS,SOLAR PUMPS,MANUAL/ANIMAL,OTHERS,Soil Type,N,OC,P,K
0,Andaman and Nicobar Islands,NICOBARS,Whole Year,Arecanut,896.00,477.60,106.13,531.89,1794.44,650.92,...,0.00000,0.0,0.0,1.0,26.333333,Sandy,0.9911,0.6596,0.7816,0.8989
1,Andaman and Nicobar Islands,NICOBARS,Whole Year,Banana,198.00,976.00,106.13,531.89,1794.44,650.92,...,0.00000,0.0,0.0,1.0,26.333333,Sandy,0.9911,0.6596,0.7816,0.8989
2,Andaman and Nicobar Islands,NICOBARS,Whole Year,Black pepper,22.16,0.68,106.13,531.89,1794.44,650.92,...,0.00000,0.0,0.0,1.0,26.333333,Sandy,0.9911,0.6596,0.7816,0.8989
3,Andaman and Nicobar Islands,NICOBARS,Whole Year,Cashewnut,450.00,85.13,106.13,531.89,1794.44,650.92,...,0.00000,0.0,0.0,1.0,26.333333,Sandy,0.9911,0.6596,0.7816,0.8989
4,Andaman and Nicobar Islands,NICOBARS,Whole Year,Coconut,14358.00,59370000.00,106.13,531.89,1794.44,650.92,...,0.00000,0.0,0.0,1.0,26.333333,Sandy,0.9911,0.6596,0.7816,0.8989
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
123229,West Bengal,PURULIA,Summer,Rice,306.00,801.00,27.53,183.07,1188.54,105.57,...,22092.66667,0.0,0.0,0.0,710.388889,Alluvial,0.5027,0.5029,0.1125,0.0259
123230,West Bengal,PURULIA,Summer,Sesamum,627.00,463.00,27.53,183.07,1188.54,105.57,...,22092.66667,0.0,0.0,0.0,710.388889,Alluvial,0.5027,0.5029,0.1125,0.0259
123231,West Bengal,PURULIA,Whole Year,Sugarcane,324.00,16250.00,27.53,183.07,1188.54,105.57,...,22092.66667,0.0,0.0,0.0,710.388889,Alluvial,0.5027,0.5029,0.1125,0.0259
123232,West Bengal,PURULIA,Winter,Rice,279151.00,597899.00,27.53,183.07,1188.54,105.57,...,22092.66667,0.0,0.0,0.0,710.388889,Alluvial,0.5027,0.5029,0.1125,0.0259


In [11]:
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
model_data['State_Name'] = le.fit_transform(model_data['State_Name'])
le1 = preprocessing.LabelEncoder()
model_data['District_Name'] = le1.fit_transform(model_data['District_Name'])
le2 = preprocessing.LabelEncoder()
model_data['Soil Type'] = le2.fit_transform(model_data['Soil Type'])
le3 = preprocessing.LabelEncoder()
model_data['Crop'] = le3.fit_transform(model_data['Crop'])
le4 = preprocessing.LabelEncoder()
model_data['Season'] = le4.fit_transform(model_data['Season'])

In [12]:
le_name_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
le_name_mapping1 = dict(zip(le1.classes_, le1.transform(le1.classes_)))
le_name_mapping2 = dict(zip(le2.classes_, le2.transform(le2.classes_)))
le_name_mapping3 = dict(zip(le3.classes_, le3.transform(le3.classes_)))
le_name_mapping4 = dict(zip(le4.classes_, le4.transform(le4.classes_)))

In [188]:
le_name_mapping2

{'Alluvial': 0, 'Black': 1, 'Laterite': 2, 'Red': 3, 'Sandy': 4}

In [187]:
len(model_data.columns)

28

In [14]:
'''from sklearn.preprocessing import StandardScaler
# numerical features
num_cols = ['Crop_Year','State_Name','District_Name', 'Season','Soil Type','Area','Production','Jan-Feb','Mar-May','Jun-Sep','Oct-Dec','0-20 mts','20-40 mts','40-60 mts','60-70 mts','GREATER THAN 70 mts','ELECTRIC PUMP','DIESEL PUMP','WIND MILLS','SOLAR PUMPS','MANUAL/ANIMAL','OTHERS','TOTAL']
for i in num_cols:    
    scale = StandardScaler().fit(model_data[[i]])
    model_data[i] = scale.transform(model_data[[i]])'''

"from sklearn.preprocessing import StandardScaler\n# numerical features\nnum_cols = ['Crop_Year','State_Name','District_Name', 'Season','Soil Type','Area','Production','Jan-Feb','Mar-May','Jun-Sep','Oct-Dec','0-20 mts','20-40 mts','40-60 mts','60-70 mts','GREATER THAN 70 mts','ELECTRIC PUMP','DIESEL PUMP','WIND MILLS','SOLAR PUMPS','MANUAL/ANIMAL','OTHERS','TOTAL']\nfor i in num_cols:    \n    scale = StandardScaler().fit(model_data[[i]])\n    model_data[i] = scale.transform(model_data[[i]])"

In [189]:
model_data1.Crop.unique()

array(['Arecanut', 'Banana', 'Black pepper', 'Cashewnut', 'Coconut ',
       'Dry chillies', 'Dry ginger', 'Sugarcane', 'Sweet potato',
       'Tapioca', 'Turmeric', 'Rice', 'Maize', 'Moong(Green Gram)',
       'Urad', 'Arhar/Tur', 'Groundnut', 'Sunflower', 'Bajra',
       'Castor seed', 'Cotton(lint)', 'Horse-gram', 'Jowar', 'Onion',
       'Ragi', 'Small millets', 'Gram', 'Linseed', 'Safflower', 'Wheat',
       'Coriander', 'Soyabean', 'Tobacco', 'Rapeseed &Mustard', 'Mesta',
       'Potato', 'Cowpea(Lobia)', 'Brinjal', 'Mango', 'Papaya', 'Tomato',
       'Sesamum', 'Niger seed', 'Sannhamp', 'Garlic', 'Jute', 'Masoor',
       'Peas & beans (Pulses)', 'Paddy', 'Barley', 'Khesari', 'Guar seed',
       'Moth', 'Cardamom'], dtype=object)

In [15]:
len(model_data.Crop.unique())

54

In [16]:
train_data, test_data = np.split(model_data.sample(frac=1, random_state=1729), [int(0.8 * len(model_data))])
print(train_data.shape, test_data.shape)

(95417, 26) (23855, 26)


In [157]:
test_data.Crop.value_counts()

39    1535
25    1520
29    1187
52    1018
42     950
18     922
45     814
53     811
38     775
1      761
32     757
17     748
36     705
21     660
14     652
27     525
2      505
46     503
35     499
37     446
12     439
20     435
43     429
51     419
24     413
47     410
4      372
3      359
9      351
16     346
11     332
15     297
44     295
49     257
41     232
10     212
31     201
28     195
19     173
0      167
22     166
23     159
48     156
5      138
8      116
40     114
13     111
33      66
30      63
7       48
34      36
6       25
26      15
50      15
Name: Crop, dtype: int64

In [17]:
len(test_data.columns)

26

In [18]:
pd.concat([train_data['Crop'], train_data.drop(['Crop'], axis=1)], axis=1).to_csv('train.csv', index=False, header=False)
boto3.Session().resource('s3').Bucket(bucket_name).Object(os.path.join(prefix, 'train/train.csv')).upload_file('train.csv')
s3_input_train = sagemaker.s3_input(s3_data='s3://{}/{}/train'.format(bucket_name, prefix), content_type='csv')

's3_input' class will be renamed to 'TrainingInput' in SageMaker Python SDK v2.


In [19]:
pd.concat([test_data['Crop'], test_data.drop(['Crop'], axis=1)], axis=1).to_csv('test.csv', index=False, header=False)
boto3.Session().resource('s3').Bucket(bucket_name).Object(os.path.join(prefix, 'test/test.csv')).upload_file('test.csv')
s3_input_test = sagemaker.s3_input(s3_data='s3://{}/{}/test'.format(bucket_name, prefix), content_type='csv')

's3_input' class will be renamed to 'TrainingInput' in SageMaker Python SDK v2.


In [20]:
from sagemaker.amazon.amazon_estimator import get_image_uri 
container = get_image_uri(boto3.Session().region_name,
                          'xgboost', 
                          repo_version='1.0-1')

'get_image_uri' method will be deprecated in favor of 'ImageURIProvider' class in SageMaker Python SDK v2.


In [21]:
sess = sagemaker.Session()
xgb = sagemaker.estimator.Estimator(containers[my_region],role, train_instance_count=1, 
                                        train_instance_type='ml.m4.xlarge',
                                        output_path='s3://{}/{}/output'.format(bucket_name, prefix),
                                        sagemaker_session=sess,train_use_spot_instances=True,
                                        train_max_run=300,
                                        train_max_wait=2000)
xgb.set_hyperparameters(learning_rate = 0.13, num_round=25, objective='multi:softprob',
                        nthread=1,max_depth=7,eta=0.2,gamma=0.3,min_child_weight=1,
                        subsample=0.7,silent=0,num_class=54,scale_pos_weight = 1,n_estimators = 750)

Parameter image_name will be renamed to image_uri in SageMaker Python SDK v2.


In [22]:
xgb.fit({'train': s3_input_train,'validation': s3_input_test})

2020-10-31 09:17:31 Starting - Starting the training job...
2020-10-31 09:17:33 Starting - Launching requested ML instances......
2020-10-31 09:18:35 Starting - Preparing the instances for training......
2020-10-31 09:19:59 Downloading - Downloading input data...
2020-10-31 09:20:25 Training - Training image download completed. Training in progress.[34mArguments: train[0m
[34m[2020-10-31:09:20:26:INFO] Running standalone xgboost training.[0m
[34m[2020-10-31:09:20:26:INFO] File size need to be processed in the node: 22.5mb. Available memory size in the node: 8472.18mb[0m
[34m[2020-10-31:09:20:26:INFO] Determined delimiter of CSV input is ','[0m
[34m[09:20:26] S3DistributionType set as FullyReplicated[0m
[34m[09:20:26] 95417x25 matrix with 2385425 entries loaded from /opt/ml/input/data/train?format=csv&label_column=0&delimiter=,[0m
[34m[2020-10-31:09:20:26:INFO] Determined delimiter of CSV input is ','[0m
[34m[09:20:26] S3DistributionType set as FullyReplicated[0m
[34m[0

In [23]:
xgb_predictor = xgb.deploy(initial_instance_count=1,instance_type='ml.m4.xlarge')

Parameter image will be renamed to image_uri in SageMaker Python SDK v2.


-------------!

In [24]:
test_data_array = test_data.drop(['Crop'], axis=1).values #load the data into an array
xgb_predictor.content_type = 'text/csv' # set the data type for an inference
xgb_predictor.serializer = csv_serializer # set the serializer type

In [107]:
test_data_array[1]

array([3.20000000e+01, 1.27000000e+02, 3.00000000e+00, 1.85000000e+02,
       1.56000000e+02, 2.75300000e+01, 1.83070000e+02, 1.18854000e+03,
       1.05570000e+02, 1.01076111e+04, 1.25443889e+04, 2.72455556e+03,
       1.44805556e+03, 1.05183333e+03, 5.07338889e+03, 2.20926667e+04,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 7.10388889e+02,
       0.00000000e+00, 5.54200000e-01, 3.39300000e-01, 2.38000000e-02,
       5.29800000e-01])

In [26]:
test_data_array_top50 = test_data_array[:50]

In [108]:
test_data_array

array([[2.100e+01, 6.380e+02, 2.000e+00, ..., 4.140e-02, 8.598e-01,
        2.415e-01],
       [3.200e+01, 1.270e+02, 3.000e+00, ..., 3.393e-01, 2.380e-02,
        5.298e-01],
       [2.000e+00, 5.910e+02, 4.000e+00, ..., 0.000e+00, 3.900e-01,
        1.867e-01],
       ...,
       [1.700e+01, 4.890e+02, 1.000e+00, ..., 4.300e-03, 6.007e-01,
        2.487e-01],
       [1.900e+01, 1.750e+02, 4.000e+00, ..., 3.710e-02, 3.951e-01,
        2.667e-01],
       [1.000e+00, 2.730e+02, 2.000e+00, ..., 0.000e+00, 0.000e+00,
        0.000e+00]])

In [162]:
predictions = xgb_predictor.predict(test_data_array[3])
predictions_array = np.fromstring(predictions[1:], sep=',') # and turn the prediction into an array
print(predictions_array.shape)

(54,)


  from ipykernel import kernelapp as app


In [161]:
test_data_array[3]

array([1.40000000e+01, 3.18000000e+02, 2.00000000e+00, 3.50000000e+01,
       1.13000000e+02, 7.82333333e+00, 1.53436667e+02, 1.46625667e+03,
       2.02410000e+02, 1.17544444e+03, 3.60629630e+02, 1.87833333e+03,
       8.01403704e+03, 1.57281111e+04, 2.60960741e+04, 1.39592593e+02,
       1.48888889e+01, 6.96296296e+00, 1.01851852e+01, 8.88851852e+02,
       3.00000000e+00, 5.90000000e-02, 1.37500000e-01, 4.75300000e-01,
       2.95700000e-01])

In [163]:
predictions_array

array([0.00230869, 0.00179185, 0.00389081, 0.00149491, 0.00150536,
       0.00252819, 0.00148839, 0.00160957, 0.00171915, 0.00175507,
       0.00150768, 0.00198936, 0.00864594, 0.01181125, 0.25863007,
       0.00197079, 0.00320297, 0.04251517, 0.0509109 , 0.00150684,
       0.02064265, 0.0210575 , 0.00149964, 0.00187044, 0.01508354,
       0.10104819, 0.00155443, 0.00177081, 0.00174031, 0.0125432 ,
       0.00150217, 0.00150629, 0.03274722, 0.00149017, 0.00148877,
       0.02757967, 0.09584876, 0.09302676, 0.02403227, 0.04915063,
       0.01425589, 0.00259164, 0.00396421, 0.00201148, 0.00152871,
       0.00155392, 0.01837688, 0.00365363, 0.00333777, 0.00338595,
       0.00148806, 0.0022618 , 0.0159847 , 0.015639  ])

In [164]:
predictions_array = np.array([predictions_array])

In [165]:
predictions_array

array([[0.00230869, 0.00179185, 0.00389081, 0.00149491, 0.00150536,
        0.00252819, 0.00148839, 0.00160957, 0.00171915, 0.00175507,
        0.00150768, 0.00198936, 0.00864594, 0.01181125, 0.25863007,
        0.00197079, 0.00320297, 0.04251517, 0.0509109 , 0.00150684,
        0.02064265, 0.0210575 , 0.00149964, 0.00187044, 0.01508354,
        0.10104819, 0.00155443, 0.00177081, 0.00174031, 0.0125432 ,
        0.00150217, 0.00150629, 0.03274722, 0.00149017, 0.00148877,
        0.02757967, 0.09584876, 0.09302676, 0.02403227, 0.04915063,
        0.01425589, 0.00259164, 0.00396421, 0.00201148, 0.00152871,
        0.00155392, 0.01837688, 0.00365363, 0.00333777, 0.00338595,
        0.00148806, 0.0022618 , 0.0159847 , 0.015639  ]])

In [166]:
n = 54
top_n_pred = np.argsort(predictions_array, axis=1)[:,-n :]

In [167]:
class_labels = model_data.Crop.unique()

In [168]:
class_labels

array(['Arecanut', 'Banana', 'Black pepper', 'Cashewnut', 'Coconut ',
       'Dry chillies', 'Dry ginger', 'Sugarcane', 'Sweet potato',
       'Tapioca', 'Turmeric', 'Rice', 'Maize', 'Moong(Green Gram)',
       'Urad', 'other oilseeds', 'Arhar/Tur', 'Groundnut', 'Sunflower',
       'Bajra', 'Castor seed', 'Cotton(lint)', 'Horse-gram', 'Jowar',
       'Onion', 'Other Kharif pulses', 'Ragi', 'Small millets', 'Gram',
       'Linseed', 'Safflower', 'Wheat', 'Coriander', 'Soyabean',
       'Tobacco', 'Rapeseed &Mustard', 'Mesta', 'Potato', 'Cowpea(Lobia)',
       'Brinjal', 'Mango', 'Papaya', 'Tomato', 'Other  Rabi pulses',
       'Sesamum', 'Niger seed', 'Sannhamp', 'Garlic', 'Oilseeds total',
       'Jute', 'Masoor', 'Peas & beans (Pulses)', 'Paddy', 'Barley',
       'Khesari', 'Guar seed', 'Other Fresh Fruits', 'Other Vegetables',
       'Moth', 'Other Cereals & Millets', 'Cardamom'], dtype=object)

In [169]:
top_n_pred

array([[50,  6, 34, 33,  3, 22, 30,  4, 31, 19, 10, 44, 45, 26,  7,  8,
        28,  9, 27,  1, 23, 15, 11, 43, 51,  0,  5, 41, 16, 48, 49, 47,
         2, 42, 12, 13, 29, 40, 24, 53, 52, 46, 20, 21, 38, 35, 32, 17,
        39, 18, 37, 36, 25, 14]])

In [170]:
class_labels[top_n_pred]

array([['Masoor', 'Dry ginger', 'Tobacco', 'Soyabean', 'Cashewnut',
        'Horse-gram', 'Safflower', 'Coconut ', 'Wheat', 'Bajra',
        'Turmeric', 'Sesamum', 'Niger seed', 'Ragi', 'Sugarcane',
        'Sweet potato', 'Gram', 'Tapioca', 'Small millets', 'Banana',
        'Jowar', 'other oilseeds', 'Rice', 'Other  Rabi pulses',
        'Peas & beans (Pulses)', 'Arecanut', 'Dry chillies', 'Papaya',
        'Arhar/Tur', 'Oilseeds total', 'Jute', 'Garlic', 'Black pepper',
        'Tomato', 'Maize', 'Moong(Green Gram)', 'Linseed', 'Mango',
        'Onion', 'Barley', 'Paddy', 'Sannhamp', 'Castor seed',
        'Cotton(lint)', 'Cowpea(Lobia)', 'Rapeseed &Mustard',
        'Coriander', 'Groundnut', 'Brinjal', 'Sunflower', 'Potato',
        'Mesta', 'Other Kharif pulses', 'Urad']], dtype=object)

In [171]:
sorted( zip(model_data1.Crop.unique(), predictions_array[0] ),key=lambda x:x[1])[-n:]

[('Khesari', 0.0014880596427246928),
 ('Dry ginger', 0.0014883875846862793),
 ('Mesta', 0.0014887652359902859),
 ('Rapeseed &Mustard', 0.0014901658287271857),
 ('Cashewnut', 0.0014949063770473003),
 ('Jowar', 0.0014996420359238982),
 ('Coriander', 0.0015021741855889559),
 ('Coconut ', 0.0015053563984110951),
 ('Soyabean', 0.001506292843259871),
 ('Castor seed', 0.0015068387147039175),
 ('Turmeric', 0.0015076824929565191),
 ('Garlic', 0.0015287109417840838),
 ('Jute', 0.0015539187006652355),
 ('Gram', 0.0015544277848675847),
 ('Sugarcane', 0.0016095747705549002),
 ('Sweet potato', 0.0017191546503454447),
 ('Safflower', 0.0017403105739504099),
 ('Tapioca', 0.0017550685442984104),
 ('Linseed', 0.001770805916748941),
 ('Banana', 0.0017918547382578254),
 ('Onion', 0.0018704435788094997),
 ('Arhar/Tur', 0.0019707882311195135),
 ('Rice', 0.0019893620628863573),
 ('Sannhamp', 0.002011484932154417),
 ('Guar seed', 0.0022618041839450598),
 ('Arecanut', 0.002308690920472145),
 ('Dry chillies', 0.

In [159]:
test_data.columns

Index(['State_Name', 'District_Name', 'Season', 'Crop', 'Area', 'Production',
       'Jan-Feb', 'Mar-May', 'Jun-Sep', 'Oct-Dec', '0-20 mts', '20-40 mts',
       '40-60 mts', '60-70 mts', 'GREATER THAN 70 mts', 'ELECTRIC PUMP',
       'DIESEL PUMP', 'WIND MILLS', 'SOLAR PUMPS', 'MANUAL/ANIMAL', 'OTHERS',
       'Soil Type', 'N', 'OC', 'P', 'K'],
      dtype='object')

In [160]:
le_name_mapping3

{'Arecanut': 0,
 'Arhar/Tur': 1,
 'Bajra': 2,
 'Banana': 3,
 'Barley': 4,
 'Black pepper': 5,
 'Brinjal': 6,
 'Cardamom': 7,
 'Cashewnut': 8,
 'Castor seed': 9,
 'Coconut ': 10,
 'Coriander': 11,
 'Cotton(lint)': 12,
 'Cowpea(Lobia)': 13,
 'Dry chillies': 14,
 'Dry ginger': 15,
 'Garlic': 16,
 'Gram': 17,
 'Groundnut': 18,
 'Guar seed': 19,
 'Horse-gram': 20,
 'Jowar': 21,
 'Jute': 22,
 'Khesari': 23,
 'Linseed': 24,
 'Maize': 25,
 'Mango': 26,
 'Masoor': 27,
 'Mesta': 28,
 'Moong(Green Gram)': 29,
 'Moth': 30,
 'Niger seed': 31,
 'Onion': 32,
 'Paddy': 33,
 'Papaya': 34,
 'Peas & beans (Pulses)': 35,
 'Potato': 36,
 'Ragi': 37,
 'Rapeseed &Mustard': 38,
 'Rice': 39,
 'Safflower': 40,
 'Sannhamp': 41,
 'Sesamum': 42,
 'Small millets': 43,
 'Soyabean': 44,
 'Sugarcane': 45,
 'Sunflower': 46,
 'Sweet potato': 47,
 'Tapioca': 48,
 'Tobacco': 49,
 'Tomato': 50,
 'Turmeric': 51,
 'Urad': 52,
 'Wheat': 53}

In [175]:
model_data1.Crop.unique()

array(['Arecanut', 'Banana', 'Black pepper', 'Cashewnut', 'Coconut ',
       'Dry chillies', 'Dry ginger', 'Sugarcane', 'Sweet potato',
       'Tapioca', 'Turmeric', 'Rice', 'Maize', 'Moong(Green Gram)',
       'Urad', 'Arhar/Tur', 'Groundnut', 'Sunflower', 'Bajra',
       'Castor seed', 'Cotton(lint)', 'Horse-gram', 'Jowar', 'Onion',
       'Ragi', 'Small millets', 'Gram', 'Linseed', 'Safflower', 'Wheat',
       'Coriander', 'Soyabean', 'Tobacco', 'Rapeseed &Mustard', 'Mesta',
       'Potato', 'Cowpea(Lobia)', 'Brinjal', 'Mango', 'Papaya', 'Tomato',
       'Sesamum', 'Niger seed', 'Sannhamp', 'Garlic', 'Jute', 'Masoor',
       'Peas & beans (Pulses)', 'Paddy', 'Barley', 'Khesari', 'Guar seed',
       'Moth', 'Cardamom'], dtype=object)

In [None]:
'''sagemaker.Session().delete_endpoint(xgb_predictor.endpoint)
bucket_to_delete = boto3.resource('s3').Bucket(bucket_name)
bucket_to_delete.objects.all().delete()'''