In [1]:
# Importing the Required Packages
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import ExtraTreesRegressor,RandomForestRegressor,VotingRegressor
from sklearn.neighbors import KNeighborsRegressor
from xgboost import XGBRegressor

In [2]:
# Getting the Dataset
crop = pd.read_csv("crop_yield.csv")
crop

Unnamed: 0,Crop,Crop_Year,Season,State,Area,Production,Annual_Rainfall,Fertilizer,Pesticide,Yield
0,Arecanut,1997,Whole Year,Assam,73814.0,56708,2051.4,7024878.38,22882.34,0.796087
1,Arhar/Tur,1997,Kharif,Assam,6637.0,4685,2051.4,631643.29,2057.47,0.710435
2,Castor seed,1997,Kharif,Assam,796.0,22,2051.4,75755.32,246.76,0.238333
3,Coconut,1997,Whole Year,Assam,19656.0,126905000,2051.4,1870661.52,6093.36,5238.051739
4,Cotton(lint),1997,Kharif,Assam,1739.0,794,2051.4,165500.63,539.09,0.420909
...,...,...,...,...,...,...,...,...,...,...
19684,Small millets,1998,Kharif,Nagaland,4000.0,2000,1498.0,395200.00,1160.00,0.500000
19685,Wheat,1998,Rabi,Nagaland,1000.0,3000,1498.0,98800.00,290.00,3.000000
19686,Maize,1997,Kharif,Jammu and Kashmir,310883.0,440900,1356.2,29586735.11,96373.73,1.285000
19687,Rice,1997,Kharif,Jammu and Kashmir,275746.0,5488,1356.2,26242746.82,85481.26,0.016667


In [3]:
# Checking tje size of dataset in (rows,columns) form
crop.shape

(19689, 10)

In [4]:
# Checking that the dataset has null value or not
crop.isnull().sum()

Crop               0
Crop_Year          0
Season             0
State              0
Area               0
Production         0
Annual_Rainfall    0
Fertilizer         0
Pesticide          0
Yield              0
dtype: int64

In [5]:
# Getting unique value of crop
crop["Crop"].unique()

array(['Arecanut', 'Arhar/Tur', 'Castor seed', 'Coconut ', 'Cotton(lint)',
       'Dry chillies', 'Gram', 'Jute', 'Linseed', 'Maize', 'Mesta',
       'Niger seed', 'Onion', 'Other  Rabi pulses', 'Potato',
       'Rapeseed &Mustard', 'Rice', 'Sesamum', 'Small millets',
       'Sugarcane', 'Sweet potato', 'Tapioca', 'Tobacco', 'Turmeric',
       'Wheat', 'Bajra', 'Black pepper', 'Cardamom', 'Coriander',
       'Garlic', 'Ginger', 'Groundnut', 'Horse-gram', 'Jowar', 'Ragi',
       'Cashewnut', 'Banana', 'Soyabean', 'Barley', 'Khesari', 'Masoor',
       'Moong(Green Gram)', 'Other Kharif pulses', 'Safflower',
       'Sannhamp', 'Sunflower', 'Urad', 'Peas & beans (Pulses)',
       'other oilseeds', 'Other Cereals', 'Cowpea(Lobia)',
       'Oilseeds total', 'Guar seed', 'Other Summer Pulses', 'Moth'],
      dtype=object)

In [6]:
crop["Season"].unique()

array(['Whole Year ', 'Kharif     ', 'Rabi       ', 'Autumn     ',
       'Summer     ', 'Winter     '], dtype=object)

In [7]:
crop["State"].unique()

array(['Assam', 'Karnataka', 'Kerala', 'Meghalaya', 'West Bengal',
       'Puducherry', 'Goa', 'Andhra Pradesh', 'Tamil Nadu', 'Odisha',
       'Bihar', 'Gujarat', 'Madhya Pradesh', 'Maharashtra', 'Mizoram',
       'Punjab', 'Uttar Pradesh', 'Haryana', 'Himachal Pradesh',
       'Tripura', 'Nagaland', 'Chhattisgarh', 'Uttarakhand', 'Jharkhand',
       'Delhi', 'Manipur', 'Jammu and Kashmir', 'Telangana',
       'Arunachal Pradesh', 'Sikkim'], dtype=object)

In [8]:
# Using LabelEncoder for encoding
encode = LabelEncoder()

In [9]:
# Encoding all the string column to numeric value also we can say it as data preprocessing
crop["Crop"] = encode.fit_transform(crop["Crop"])
crop["Season"] = encode.fit_transform(crop["Season"])
crop["State"] = encode.fit_transform(crop["State"])

In [10]:
crop["Crop"].unique()

array([ 0,  1,  8,  9, 11, 13, 16, 21, 23, 24, 26, 29, 31, 32, 37, 39, 40,
       43, 44, 46, 48, 49, 50, 51, 53,  2,  5,  6, 10, 14, 15, 17, 19, 20,
       38,  7,  3, 45,  4, 22, 25, 27, 34, 41, 42, 47, 52, 36, 54, 33, 12,
       30, 18, 35, 28])

In [11]:
crop["Season"].unique()

array([4, 1, 2, 0, 3, 5])

In [12]:
crop["State"].unique()

array([ 2, 12, 13, 17, 29, 21,  6,  0, 24, 20,  3,  7, 14, 15, 18, 22, 27,
        8,  9, 26, 19,  4, 28, 11,  5, 16, 10, 25,  1, 23])

In [13]:
# Dividing Dataset as dependent and independent data
x = crop.iloc[:,:-1]
y = crop.iloc[:,-1]

In [14]:
print(x)

       Crop  Crop_Year  Season  State      Area  Production  Annual_Rainfall  \
0         0       1997       4      2   73814.0       56708           2051.4   
1         1       1997       1      2    6637.0        4685           2051.4   
2         8       1997       1      2     796.0          22           2051.4   
3         9       1997       4      2   19656.0   126905000           2051.4   
4        11       1997       1      2    1739.0         794           2051.4   
...     ...        ...     ...    ...       ...         ...              ...   
19684    44       1998       1     19    4000.0        2000           1498.0   
19685    53       1998       2     19    1000.0        3000           1498.0   
19686    24       1997       1     10  310883.0      440900           1356.2   
19687    40       1997       1     10  275746.0        5488           1356.2   
19688    53       1997       2     10  239344.0      392160           1356.2   

        Fertilizer  Pesticide  
0      

In [15]:
print(y)

0           0.796087
1           0.710435
2           0.238333
3        5238.051739
4           0.420909
            ...     
19684       0.500000
19685       3.000000
19686       1.285000
19687       0.016667
19688       1.261818
Name: Yield, Length: 19689, dtype: float64


In [16]:
# Splitting x and y into training and testing
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2)

In [17]:
# Applying Multiple Regresion Algorithm for fit and predict the desired data below in order to know the data.

In [18]:
Regg = LinearRegression()

In [19]:
Regg.fit(x_train,y_train)

In [20]:
Regg.predict(x_test)

array([237.78064817,  68.28164128,  32.33918287, ..., -65.89851249,
        58.45220317,  49.65671222])

In [21]:
# Training Accuracy
Regg.score(x_train,y_train)*100

35.14840864757819

In [22]:
Dec = DecisionTreeRegressor()
Dec.fit(x_train,y_train)
Dec.predict(x_test)

array([0.31111111, 0.35357143, 6.55857143, ..., 1.36      , 0.37166667,
       0.80727273])

In [23]:
# Training Accuracy
Dec.score(x_train,y_train)*100

100.0

In [24]:
# Testing Accuracy
Dec.score(x_test,y_test)*100

99.058118025413

In [25]:
Rand = RandomForestRegressor(n_estimators=60)
Rand.fit(x_train,y_train)
Rand.predict(x_test)

array([0.27403567, 0.35143528, 6.57259732, ..., 1.45908706, 0.37897893,
       0.79841867])

In [26]:
# Training Accuracy
Rand.score(x_train,y_train)*100

99.12959970284643

In [27]:
# Testing Accuracy
Rand.score(x_test,y_test)*100

98.24262557984471

In [28]:
Etr = ExtraTreesRegressor(n_estimators=50)
Etr.fit(x_train,y_train)
Etr.predict(x_test)

array([0.36945848, 0.39429937, 6.72608756, ..., 1.43410519, 1.34800468,
       0.83719612])

In [29]:
# Training Accuracy
Etr.score(x_train,y_train)*100

100.0

In [30]:
# Testing Accuracy
Etr.score(x_test,y_test)*100

99.58722208876772

In [31]:
k = KNeighborsRegressor()
k.fit(x_train,y_train)
k.predict(x_test)

array([0.39955238, 0.29548386, 5.49938236, ..., 1.93178803, 0.78278182,
       0.87047273])

In [32]:
# Training Accuracy
k.score(x_train,y_train)*100

94.72715275825443

In [33]:
# Testing Accuracy
k.score(x_test,y_test)*100

91.79506905652036

In [34]:
xgb = XGBRegressor()
xgb.fit(x_train,y_train)
xgb.predict(x_test)

array([-0.4074052 ,  0.12183831,  6.6034055 , ...,  1.071834  ,
        0.52750033,  0.9339147 ], dtype=float32)

In [35]:
# Training Accuracy
xgb.score(x_train,y_train)*100

99.99977673155087

In [36]:
# Testing Accuracy
xgb.score(x_test,y_test)*100

96.8469941962726

In [37]:
vc = VotingRegressor(estimators=[('Etr',Etr),('Dec',Dec)])
vc.fit(x_train,y_train)
vc.predict(x_test)

array([0.27852377, 0.40175581, 6.69057619, ..., 1.43218897, 0.82570214,
       1.06971051])

In [38]:
# Training Accuracy
vc.score(x_train,y_train)*100

100.0

In [39]:
# Testing Accuracy
vc.score(x_test,y_test)*100

99.49005478542814

In [40]:
# The model has 99.22% of Accuracy
def CropYieldPrediction():
    # Taking user input
    Crop = input("Enter Crop from the following (Arecanut, Arhar/Tur, Castor seed, Coconut, Cotton(lint), Dry chillies, Gram, Jute, Linseed, Maize, Mesta, Niger seed, Onion, Other Rabi pulses, Potato, Rapeseed & Mustard, Rice, Sesamum, Small millets, Sugarcane, Sweet potato, Tapioca, Tobacco, Turmeric, Wheat, Bajra, Black pepper, Cardamom, Coriander, Garlic, Ginger, Groundnut, Horse-gram, Jowar, Ragi, Cashewnut, Banana, Soyabean, Barley, Khesari, Masoor, Moong(Green Gram), Other Kharif pulses, Safflower, Sannhamp, Sunflower, Urad, Peas & beans (Pulses), other oilseeds, Other Cereals, Cowpea(Lobia), Oilseeds total, Guar seed, Other Summer Pulses, Moth): ")
    Crop_Year = int(input("Enter Crop Year: "))
    Season = input("Enter Season from (Whole Year, Kharif, Rabi, Autumn, Summer, Winter): ")
    State = input("Enter State from (Assam, Karnataka, Kerala, Meghalaya, West Bengal, Puducherry, Goa, Andhra Pradesh, Tamil Nadu, Odisha, Bihar, Gujarat, Madhya Pradesh, Maharashtra, Mizoram, Punjab, Uttar Pradesh, Haryana, Himachal Pradesh, Tripura, Nagaland, Chhattisgarh, Uttarakhand, Jharkhand, Delhi, Manipur, Jammu and Kashmir, Telangana, Arunachal Pradesh, Sikkim): ")
    Area = float(input("Enter Area(in ha)(also 1 he = 1000 sq meters): "))
    Production = float(input("Enter Production(in kg): "))
    Annual_Rainfall = float(input("Enter Annual Rainfall(in mm): "))
    Fertilizer = float(input("Enter Fertilizer(in kg): "))
    Pesticide = float(input("Enter Pesticide(in kg): "))
    # Encoding the input parameters
    Crop = encode.fit_transform([Crop])[0]
    Season = encode.fit_transform([Season])[0]
    State = encode.fit_transform([State])[0]
    input_data = [[Crop,Crop_Year,Season,State,Area,Production,Annual_Rainfall,Fertilizer,Pesticide]]
    result = Etr.predict(input_data)
    print(f"{result} kg/ha is type crop yield.")

In [41]:
# Sample testing of prediction function
CropYieldPrediction()

Enter Crop from the following (Arecanut, Arhar/Tur, Castor seed, Coconut, Cotton(lint), Dry chillies, Gram, Jute, Linseed, Maize, Mesta, Niger seed, Onion, Other Rabi pulses, Potato, Rapeseed & Mustard, Rice, Sesamum, Small millets, Sugarcane, Sweet potato, Tapioca, Tobacco, Turmeric, Wheat, Bajra, Black pepper, Cardamom, Coriander, Garlic, Ginger, Groundnut, Horse-gram, Jowar, Ragi, Cashewnut, Banana, Soyabean, Barley, Khesari, Masoor, Moong(Green Gram), Other Kharif pulses, Safflower, Sannhamp, Sunflower, Urad, Peas & beans (Pulses), other oilseeds, Other Cereals, Cowpea(Lobia), Oilseeds total, Guar seed, Other Summer Pulses, Moth): Urad
Enter Crop Year: 2024
Enter Season from (Whole Year, Kharif, Rabi, Autumn, Summer, Winter): Kharif
Enter State from (Assam, Karnataka, Kerala, Meghalaya, West Bengal, Puducherry, Goa, Andhra Pradesh, Tamil Nadu, Odisha, Bihar, Gujarat, Madhya Pradesh, Maharashtra, Mizoram, Punjab, Uttar Pradesh, Haryana, Himachal Pradesh, Tripura, Nagaland, Chhattisg



In [42]:
CropYieldPrediction()

Enter Crop from the following (Arecanut, Arhar/Tur, Castor seed, Coconut, Cotton(lint), Dry chillies, Gram, Jute, Linseed, Maize, Mesta, Niger seed, Onion, Other Rabi pulses, Potato, Rapeseed & Mustard, Rice, Sesamum, Small millets, Sugarcane, Sweet potato, Tapioca, Tobacco, Turmeric, Wheat, Bajra, Black pepper, Cardamom, Coriander, Garlic, Ginger, Groundnut, Horse-gram, Jowar, Ragi, Cashewnut, Banana, Soyabean, Barley, Khesari, Masoor, Moong(Green Gram), Other Kharif pulses, Safflower, Sannhamp, Sunflower, Urad, Peas & beans (Pulses), other oilseeds, Other Cereals, Cowpea(Lobia), Oilseeds total, Guar seed, Other Summer Pulses, Moth): Cashewnut
Enter Crop Year: 2020
Enter Season from (Whole Year, Kharif, Rabi, Autumn, Summer, Winter): Rabi
Enter State from (Assam, Karnataka, Kerala, Meghalaya, West Bengal, Puducherry, Goa, Andhra Pradesh, Tamil Nadu, Odisha, Bihar, Gujarat, Madhya Pradesh, Maharashtra, Mizoram, Punjab, Uttar Pradesh, Haryana, Himachal Pradesh, Tripura, Nagaland, Chhatt



In [43]:
CropYieldPrediction()

Enter Crop from the following (Arecanut, Arhar/Tur, Castor seed, Coconut, Cotton(lint), Dry chillies, Gram, Jute, Linseed, Maize, Mesta, Niger seed, Onion, Other Rabi pulses, Potato, Rapeseed & Mustard, Rice, Sesamum, Small millets, Sugarcane, Sweet potato, Tapioca, Tobacco, Turmeric, Wheat, Bajra, Black pepper, Cardamom, Coriander, Garlic, Ginger, Groundnut, Horse-gram, Jowar, Ragi, Cashewnut, Banana, Soyabean, Barley, Khesari, Masoor, Moong(Green Gram), Other Kharif pulses, Safflower, Sannhamp, Sunflower, Urad, Peas & beans (Pulses), other oilseeds, Other Cereals, Cowpea(Lobia), Oilseeds total, Guar seed, Other Summer Pulses, Moth): Rice
Enter Crop Year: 2025
Enter Season from (Whole Year, Kharif, Rabi, Autumn, Summer, Winter): Kharif
Enter State from (Assam, Karnataka, Kerala, Meghalaya, West Bengal, Puducherry, Goa, Andhra Pradesh, Tamil Nadu, Odisha, Bihar, Gujarat, Madhya Pradesh, Maharashtra, Mizoram, Punjab, Uttar Pradesh, Haryana, Himachal Pradesh, Tripura, Nagaland, Chhattisg



In [44]:
CropYieldPrediction()

Enter Crop from the following (Arecanut, Arhar/Tur, Castor seed, Coconut, Cotton(lint), Dry chillies, Gram, Jute, Linseed, Maize, Mesta, Niger seed, Onion, Other Rabi pulses, Potato, Rapeseed & Mustard, Rice, Sesamum, Small millets, Sugarcane, Sweet potato, Tapioca, Tobacco, Turmeric, Wheat, Bajra, Black pepper, Cardamom, Coriander, Garlic, Ginger, Groundnut, Horse-gram, Jowar, Ragi, Cashewnut, Banana, Soyabean, Barley, Khesari, Masoor, Moong(Green Gram), Other Kharif pulses, Safflower, Sannhamp, Sunflower, Urad, Peas & beans (Pulses), other oilseeds, Other Cereals, Cowpea(Lobia), Oilseeds total, Guar seed, Other Summer Pulses, Moth): Maize
Enter Crop Year: 2025
Enter Season from (Whole Year, Kharif, Rabi, Autumn, Summer, Winter): Kharif
Enter State from (Assam, Karnataka, Kerala, Meghalaya, West Bengal, Puducherry, Goa, Andhra Pradesh, Tamil Nadu, Odisha, Bihar, Gujarat, Madhya Pradesh, Maharashtra, Mizoram, Punjab, Uttar Pradesh, Haryana, Himachal Pradesh, Tripura, Nagaland, Chhattis

