In [1]:
import pandas as pd 
import numpy as np
pd.set_option("display.max_columns",None)

from sklearn.preprocessing import LabelEncoder, MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score

# ML models
#  minmaxscaler -> 0-1 
#  standardization -> mean = 0, std dev=> 1, -5.23, 5.56




In [2]:
data = pd.read_csv('Smart_Farming_Crop_Yield_2024.csv')

In [3]:
data.head(5)

Unnamed: 0,farm_id,region,crop_type,soil_moisture_%,soil_pH,temperature_C,rainfall_mm,humidity_%,sunlight_hours,irrigation_type,fertilizer_type,pesticide_usage_ml,sowing_date,harvest_date,total_days,yield_kg_per_hectare,sensor_id,timestamp,latitude,longitude,NDVI_index,crop_disease_status
0,FARM0001,North India,Wheat,35.95,5.99,17.79,75.62,77.03,7.27,,Organic,6.34,2024-01-08,2024-05-09,122,4408.07,SENS0001,2024-03-19,14.970941,82.997689,0.63,Mild
1,FARM0002,South USA,Soybean,19.74,7.24,30.18,89.91,61.13,5.67,Sprinkler,Inorganic,9.6,2024-02-04,2024-05-26,112,5389.98,SENS0002,2024-04-21,16.613022,70.869009,0.58,
2,FARM0003,South USA,Wheat,29.32,7.16,27.37,265.43,68.87,8.23,Drip,Mixed,15.26,2024-02-03,2024-06-26,144,2931.16,SENS0003,2024-02-28,19.503156,79.068206,0.8,Mild
3,FARM0004,Central USA,Maize,17.33,6.03,33.73,212.01,70.46,5.03,Sprinkler,Organic,25.8,2024-02-21,2024-07-04,134,4227.8,SENS0004,2024-05-14,31.071298,85.519998,0.44,
4,FARM0005,Central USA,Cotton,19.37,5.92,33.86,269.09,55.73,7.93,,Mixed,25.65,2024-02-05,2024-05-20,105,4979.96,SENS0005,2024-04-13,16.56854,81.69172,0.84,Severe


- Object : `farm_id`,`region`,`crop_type`,`irrigation_type`,`fertilizer_type`,`sowing_date`,`harvest_date`,`sensor_id`,`timestamp`,`crop_disease_satus`
- float : 

In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 22 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   farm_id               500 non-null    object 
 1   region                500 non-null    object 
 2   crop_type             500 non-null    object 
 3   soil_moisture_%       500 non-null    float64
 4   soil_pH               500 non-null    float64
 5   temperature_C         500 non-null    float64
 6   rainfall_mm           500 non-null    float64
 7   humidity_%            500 non-null    float64
 8   sunlight_hours        500 non-null    float64
 9   irrigation_type       350 non-null    object 
 10  fertilizer_type       500 non-null    object 
 11  pesticide_usage_ml    500 non-null    float64
 12  sowing_date           500 non-null    object 
 13  harvest_date          500 non-null    object 
 14  total_days            500 non-null    int64  
 15  yield_kg_per_hectare  5

In [5]:
for i in data.columns:
    print(data[i].unique())

['FARM0001' 'FARM0002' 'FARM0003' 'FARM0004' 'FARM0005' 'FARM0006'
 'FARM0007' 'FARM0008' 'FARM0009' 'FARM0010' 'FARM0011' 'FARM0012'
 'FARM0013' 'FARM0014' 'FARM0015' 'FARM0016' 'FARM0017' 'FARM0018'
 'FARM0019' 'FARM0020' 'FARM0021' 'FARM0022' 'FARM0023' 'FARM0024'
 'FARM0025' 'FARM0026' 'FARM0027' 'FARM0028' 'FARM0029' 'FARM0030'
 'FARM0031' 'FARM0032' 'FARM0033' 'FARM0034' 'FARM0035' 'FARM0036'
 'FARM0037' 'FARM0038' 'FARM0039' 'FARM0040' 'FARM0041' 'FARM0042'
 'FARM0043' 'FARM0044' 'FARM0045' 'FARM0046' 'FARM0047' 'FARM0048'
 'FARM0049' 'FARM0050' 'FARM0051' 'FARM0052' 'FARM0053' 'FARM0054'
 'FARM0055' 'FARM0056' 'FARM0057' 'FARM0058' 'FARM0059' 'FARM0060'
 'FARM0061' 'FARM0062' 'FARM0063' 'FARM0064' 'FARM0065' 'FARM0066'
 'FARM0067' 'FARM0068' 'FARM0069' 'FARM0070' 'FARM0071' 'FARM0072'
 'FARM0073' 'FARM0074' 'FARM0075' 'FARM0076' 'FARM0077' 'FARM0078'
 'FARM0079' 'FARM0080' 'FARM0081' 'FARM0082' 'FARM0083' 'FARM0084'
 'FARM0085' 'FARM0086' 'FARM0087' 'FARM0088' 'FARM0089' 'FARM0

In [6]:
data['timestamp'].nunique()

172

- Columns to drop : `farm_id` , `Sensor_ID`
- columns to categorize : `region`, `crop_type`,`timestamp`,`irrigation_type`,`fertilizer_type`,`crop_disease_status`

In [7]:
# dropping columns
data.drop(
    columns = ['farm_id' , 'sensor_id'],
    axis=1,
    inplace=True
)

In [8]:
data.head()

Unnamed: 0,region,crop_type,soil_moisture_%,soil_pH,temperature_C,rainfall_mm,humidity_%,sunlight_hours,irrigation_type,fertilizer_type,pesticide_usage_ml,sowing_date,harvest_date,total_days,yield_kg_per_hectare,timestamp,latitude,longitude,NDVI_index,crop_disease_status
0,North India,Wheat,35.95,5.99,17.79,75.62,77.03,7.27,,Organic,6.34,2024-01-08,2024-05-09,122,4408.07,2024-03-19,14.970941,82.997689,0.63,Mild
1,South USA,Soybean,19.74,7.24,30.18,89.91,61.13,5.67,Sprinkler,Inorganic,9.6,2024-02-04,2024-05-26,112,5389.98,2024-04-21,16.613022,70.869009,0.58,
2,South USA,Wheat,29.32,7.16,27.37,265.43,68.87,8.23,Drip,Mixed,15.26,2024-02-03,2024-06-26,144,2931.16,2024-02-28,19.503156,79.068206,0.8,Mild
3,Central USA,Maize,17.33,6.03,33.73,212.01,70.46,5.03,Sprinkler,Organic,25.8,2024-02-21,2024-07-04,134,4227.8,2024-05-14,31.071298,85.519998,0.44,
4,Central USA,Cotton,19.37,5.92,33.86,269.09,55.73,7.93,,Mixed,25.65,2024-02-05,2024-05-20,105,4979.96,2024-04-13,16.56854,81.69172,0.84,Severe


#### Categorizing Regioins

In [9]:
# region categorizing
region_labels = LabelEncoder()
data['region'] = region_labels.fit_transform(data['region']) 


In [10]:
data.head()

Unnamed: 0,region,crop_type,soil_moisture_%,soil_pH,temperature_C,rainfall_mm,humidity_%,sunlight_hours,irrigation_type,fertilizer_type,pesticide_usage_ml,sowing_date,harvest_date,total_days,yield_kg_per_hectare,timestamp,latitude,longitude,NDVI_index,crop_disease_status
0,2,Wheat,35.95,5.99,17.79,75.62,77.03,7.27,,Organic,6.34,2024-01-08,2024-05-09,122,4408.07,2024-03-19,14.970941,82.997689,0.63,Mild
1,4,Soybean,19.74,7.24,30.18,89.91,61.13,5.67,Sprinkler,Inorganic,9.6,2024-02-04,2024-05-26,112,5389.98,2024-04-21,16.613022,70.869009,0.58,
2,4,Wheat,29.32,7.16,27.37,265.43,68.87,8.23,Drip,Mixed,15.26,2024-02-03,2024-06-26,144,2931.16,2024-02-28,19.503156,79.068206,0.8,Mild
3,0,Maize,17.33,6.03,33.73,212.01,70.46,5.03,Sprinkler,Organic,25.8,2024-02-21,2024-07-04,134,4227.8,2024-05-14,31.071298,85.519998,0.44,
4,0,Cotton,19.37,5.92,33.86,269.09,55.73,7.93,,Mixed,25.65,2024-02-05,2024-05-20,105,4979.96,2024-04-13,16.56854,81.69172,0.84,Severe


In [11]:
data['crop_disease_status'].isna().sum()

130

In [12]:
500-130

370

In [13]:
data[data['crop_disease_status']=='Severe']['crop_disease_status'].value_counts()

crop_disease_status
Severe    133
Name: count, dtype: int64

In [14]:
370-133

237

In [15]:
data[data['crop_disease_status']=='Mild']['crop_disease_status'].value_counts()

crop_disease_status
Mild    125
Name: count, dtype: int64

In [16]:
237-125

112

In [17]:
data['crop_disease_status'].fillna('uncertain',inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['crop_disease_status'].fillna('uncertain',inplace=True)


In [18]:
data.head()

Unnamed: 0,region,crop_type,soil_moisture_%,soil_pH,temperature_C,rainfall_mm,humidity_%,sunlight_hours,irrigation_type,fertilizer_type,pesticide_usage_ml,sowing_date,harvest_date,total_days,yield_kg_per_hectare,timestamp,latitude,longitude,NDVI_index,crop_disease_status
0,2,Wheat,35.95,5.99,17.79,75.62,77.03,7.27,,Organic,6.34,2024-01-08,2024-05-09,122,4408.07,2024-03-19,14.970941,82.997689,0.63,Mild
1,4,Soybean,19.74,7.24,30.18,89.91,61.13,5.67,Sprinkler,Inorganic,9.6,2024-02-04,2024-05-26,112,5389.98,2024-04-21,16.613022,70.869009,0.58,uncertain
2,4,Wheat,29.32,7.16,27.37,265.43,68.87,8.23,Drip,Mixed,15.26,2024-02-03,2024-06-26,144,2931.16,2024-02-28,19.503156,79.068206,0.8,Mild
3,0,Maize,17.33,6.03,33.73,212.01,70.46,5.03,Sprinkler,Organic,25.8,2024-02-21,2024-07-04,134,4227.8,2024-05-14,31.071298,85.519998,0.44,uncertain
4,0,Cotton,19.37,5.92,33.86,269.09,55.73,7.93,,Mixed,25.65,2024-02-05,2024-05-20,105,4979.96,2024-04-13,16.56854,81.69172,0.84,Severe


In [19]:
data['irrigation_type'].isna().sum()

150

In [20]:
data['irrigation_type'].fillna('Unknown',inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['irrigation_type'].fillna('Unknown',inplace=True)


In [21]:
data.head()

Unnamed: 0,region,crop_type,soil_moisture_%,soil_pH,temperature_C,rainfall_mm,humidity_%,sunlight_hours,irrigation_type,fertilizer_type,pesticide_usage_ml,sowing_date,harvest_date,total_days,yield_kg_per_hectare,timestamp,latitude,longitude,NDVI_index,crop_disease_status
0,2,Wheat,35.95,5.99,17.79,75.62,77.03,7.27,Unknown,Organic,6.34,2024-01-08,2024-05-09,122,4408.07,2024-03-19,14.970941,82.997689,0.63,Mild
1,4,Soybean,19.74,7.24,30.18,89.91,61.13,5.67,Sprinkler,Inorganic,9.6,2024-02-04,2024-05-26,112,5389.98,2024-04-21,16.613022,70.869009,0.58,uncertain
2,4,Wheat,29.32,7.16,27.37,265.43,68.87,8.23,Drip,Mixed,15.26,2024-02-03,2024-06-26,144,2931.16,2024-02-28,19.503156,79.068206,0.8,Mild
3,0,Maize,17.33,6.03,33.73,212.01,70.46,5.03,Sprinkler,Organic,25.8,2024-02-21,2024-07-04,134,4227.8,2024-05-14,31.071298,85.519998,0.44,uncertain
4,0,Cotton,19.37,5.92,33.86,269.09,55.73,7.93,Unknown,Mixed,25.65,2024-02-05,2024-05-20,105,4979.96,2024-04-13,16.56854,81.69172,0.84,Severe


In [22]:
# irrigation_type categorizing
irrigation_labels = LabelEncoder()
data['irrigation_type'] = irrigation_labels.fit_transform(data['irrigation_type']) 


In [23]:
data.head()

Unnamed: 0,region,crop_type,soil_moisture_%,soil_pH,temperature_C,rainfall_mm,humidity_%,sunlight_hours,irrigation_type,fertilizer_type,pesticide_usage_ml,sowing_date,harvest_date,total_days,yield_kg_per_hectare,timestamp,latitude,longitude,NDVI_index,crop_disease_status
0,2,Wheat,35.95,5.99,17.79,75.62,77.03,7.27,3,Organic,6.34,2024-01-08,2024-05-09,122,4408.07,2024-03-19,14.970941,82.997689,0.63,Mild
1,4,Soybean,19.74,7.24,30.18,89.91,61.13,5.67,2,Inorganic,9.6,2024-02-04,2024-05-26,112,5389.98,2024-04-21,16.613022,70.869009,0.58,uncertain
2,4,Wheat,29.32,7.16,27.37,265.43,68.87,8.23,0,Mixed,15.26,2024-02-03,2024-06-26,144,2931.16,2024-02-28,19.503156,79.068206,0.8,Mild
3,0,Maize,17.33,6.03,33.73,212.01,70.46,5.03,2,Organic,25.8,2024-02-21,2024-07-04,134,4227.8,2024-05-14,31.071298,85.519998,0.44,uncertain
4,0,Cotton,19.37,5.92,33.86,269.09,55.73,7.93,3,Mixed,25.65,2024-02-05,2024-05-20,105,4979.96,2024-04-13,16.56854,81.69172,0.84,Severe


In [24]:
# fertilizer_type	 categorizing
fertilizer_labels = LabelEncoder()
data['fertilizer_type'] = fertilizer_labels.fit_transform(data['fertilizer_type']) 


In [25]:
data.head()

Unnamed: 0,region,crop_type,soil_moisture_%,soil_pH,temperature_C,rainfall_mm,humidity_%,sunlight_hours,irrigation_type,fertilizer_type,pesticide_usage_ml,sowing_date,harvest_date,total_days,yield_kg_per_hectare,timestamp,latitude,longitude,NDVI_index,crop_disease_status
0,2,Wheat,35.95,5.99,17.79,75.62,77.03,7.27,3,2,6.34,2024-01-08,2024-05-09,122,4408.07,2024-03-19,14.970941,82.997689,0.63,Mild
1,4,Soybean,19.74,7.24,30.18,89.91,61.13,5.67,2,0,9.6,2024-02-04,2024-05-26,112,5389.98,2024-04-21,16.613022,70.869009,0.58,uncertain
2,4,Wheat,29.32,7.16,27.37,265.43,68.87,8.23,0,1,15.26,2024-02-03,2024-06-26,144,2931.16,2024-02-28,19.503156,79.068206,0.8,Mild
3,0,Maize,17.33,6.03,33.73,212.01,70.46,5.03,2,2,25.8,2024-02-21,2024-07-04,134,4227.8,2024-05-14,31.071298,85.519998,0.44,uncertain
4,0,Cotton,19.37,5.92,33.86,269.09,55.73,7.93,3,1,25.65,2024-02-05,2024-05-20,105,4979.96,2024-04-13,16.56854,81.69172,0.84,Severe


In [26]:
# crop_disease_status categorizing
disease_labels = LabelEncoder()
data['crop_disease_status'] = disease_labels.fit_transform(data['crop_disease_status']) 


In [27]:
data.head()

Unnamed: 0,region,crop_type,soil_moisture_%,soil_pH,temperature_C,rainfall_mm,humidity_%,sunlight_hours,irrigation_type,fertilizer_type,pesticide_usage_ml,sowing_date,harvest_date,total_days,yield_kg_per_hectare,timestamp,latitude,longitude,NDVI_index,crop_disease_status
0,2,Wheat,35.95,5.99,17.79,75.62,77.03,7.27,3,2,6.34,2024-01-08,2024-05-09,122,4408.07,2024-03-19,14.970941,82.997689,0.63,0
1,4,Soybean,19.74,7.24,30.18,89.91,61.13,5.67,2,0,9.6,2024-02-04,2024-05-26,112,5389.98,2024-04-21,16.613022,70.869009,0.58,3
2,4,Wheat,29.32,7.16,27.37,265.43,68.87,8.23,0,1,15.26,2024-02-03,2024-06-26,144,2931.16,2024-02-28,19.503156,79.068206,0.8,0
3,0,Maize,17.33,6.03,33.73,212.01,70.46,5.03,2,2,25.8,2024-02-21,2024-07-04,134,4227.8,2024-05-14,31.071298,85.519998,0.44,3
4,0,Cotton,19.37,5.92,33.86,269.09,55.73,7.93,3,1,25.65,2024-02-05,2024-05-20,105,4979.96,2024-04-13,16.56854,81.69172,0.84,2


In [28]:
# crop_type categorizing
crop_labels = LabelEncoder()
data['crop_type'] = crop_labels.fit_transform(data['crop_type']) 


In [29]:
data.head()

Unnamed: 0,region,crop_type,soil_moisture_%,soil_pH,temperature_C,rainfall_mm,humidity_%,sunlight_hours,irrigation_type,fertilizer_type,pesticide_usage_ml,sowing_date,harvest_date,total_days,yield_kg_per_hectare,timestamp,latitude,longitude,NDVI_index,crop_disease_status
0,2,4,35.95,5.99,17.79,75.62,77.03,7.27,3,2,6.34,2024-01-08,2024-05-09,122,4408.07,2024-03-19,14.970941,82.997689,0.63,0
1,4,3,19.74,7.24,30.18,89.91,61.13,5.67,2,0,9.6,2024-02-04,2024-05-26,112,5389.98,2024-04-21,16.613022,70.869009,0.58,3
2,4,4,29.32,7.16,27.37,265.43,68.87,8.23,0,1,15.26,2024-02-03,2024-06-26,144,2931.16,2024-02-28,19.503156,79.068206,0.8,0
3,0,1,17.33,6.03,33.73,212.01,70.46,5.03,2,2,25.8,2024-02-21,2024-07-04,134,4227.8,2024-05-14,31.071298,85.519998,0.44,3
4,0,0,19.37,5.92,33.86,269.09,55.73,7.93,3,1,25.65,2024-02-05,2024-05-20,105,4979.96,2024-04-13,16.56854,81.69172,0.84,2


In [30]:
# timestamp categorizing
timestamp_labels = LabelEncoder()
data['timestamp'] = timestamp_labels.fit_transform(data['timestamp']) 


In [31]:
data.head()

Unnamed: 0,region,crop_type,soil_moisture_%,soil_pH,temperature_C,rainfall_mm,humidity_%,sunlight_hours,irrigation_type,fertilizer_type,pesticide_usage_ml,sowing_date,harvest_date,total_days,yield_kg_per_hectare,timestamp,latitude,longitude,NDVI_index,crop_disease_status
0,2,4,35.95,5.99,17.79,75.62,77.03,7.27,3,2,6.34,2024-01-08,2024-05-09,122,4408.07,61,14.970941,82.997689,0.63,0
1,4,3,19.74,7.24,30.18,89.91,61.13,5.67,2,0,9.6,2024-02-04,2024-05-26,112,5389.98,94,16.613022,70.869009,0.58,3
2,4,4,29.32,7.16,27.37,265.43,68.87,8.23,0,1,15.26,2024-02-03,2024-06-26,144,2931.16,43,19.503156,79.068206,0.8,0
3,0,1,17.33,6.03,33.73,212.01,70.46,5.03,2,2,25.8,2024-02-21,2024-07-04,134,4227.8,115,31.071298,85.519998,0.44,3
4,0,0,19.37,5.92,33.86,269.09,55.73,7.93,3,1,25.65,2024-02-05,2024-05-20,105,4979.96,86,16.56854,81.69172,0.84,2


In [32]:
# dropping columns
data.drop(
    columns = ['sowing_date' , 'harvest_date'],
    axis=1,
    inplace=True
)

In [33]:
data.head(20)

Unnamed: 0,region,crop_type,soil_moisture_%,soil_pH,temperature_C,rainfall_mm,humidity_%,sunlight_hours,irrigation_type,fertilizer_type,pesticide_usage_ml,total_days,yield_kg_per_hectare,timestamp,latitude,longitude,NDVI_index,crop_disease_status
0,2,4,35.95,5.99,17.79,75.62,77.03,7.27,3,2,6.34,122,4408.07,61,14.970941,82.997689,0.63,0
1,4,3,19.74,7.24,30.18,89.91,61.13,5.67,2,0,9.6,112,5389.98,94,16.613022,70.869009,0.58,3
2,4,4,29.32,7.16,27.37,265.43,68.87,8.23,0,1,15.26,144,2931.16,43,19.503156,79.068206,0.8,0
3,0,1,17.33,6.03,33.73,212.01,70.46,5.03,2,2,25.8,134,4227.8,115,31.071298,85.519998,0.44,3
4,0,0,19.37,5.92,33.86,269.09,55.73,7.93,3,1,25.65,105,4979.96,86,16.56854,81.69172,0.84,2
5,0,2,44.91,5.78,24.87,238.95,83.06,4.92,2,1,24.0,114,4383.55,55,23.227859,89.421568,0.82,3
6,2,3,36.28,7.04,21.8,123.38,47.91,4.02,1,1,39.29,145,4501.2,163,25.224255,73.056785,0.76,3
7,1,1,27.1,5.72,22.26,296.33,80.34,5.44,2,1,47.61,121,5264.09,102,23.317654,72.51521,0.7,0
8,0,3,40.54,6.35,19.24,184.82,76.5,5.21,1,0,49.78,118,5598.46,109,13.025105,74.493947,0.5,0
9,1,2,10.25,6.92,16.18,66.85,41.57,5.98,2,0,35.1,98,4893.41,73,24.405291,74.859945,0.58,2


Data Spliting Train/Test

In [34]:
X = data.drop(columns="crop_disease_status").copy()
# y = 
X.head(5)

Unnamed: 0,region,crop_type,soil_moisture_%,soil_pH,temperature_C,rainfall_mm,humidity_%,sunlight_hours,irrigation_type,fertilizer_type,pesticide_usage_ml,total_days,yield_kg_per_hectare,timestamp,latitude,longitude,NDVI_index
0,2,4,35.95,5.99,17.79,75.62,77.03,7.27,3,2,6.34,122,4408.07,61,14.970941,82.997689,0.63
1,4,3,19.74,7.24,30.18,89.91,61.13,5.67,2,0,9.6,112,5389.98,94,16.613022,70.869009,0.58
2,4,4,29.32,7.16,27.37,265.43,68.87,8.23,0,1,15.26,144,2931.16,43,19.503156,79.068206,0.8
3,0,1,17.33,6.03,33.73,212.01,70.46,5.03,2,2,25.8,134,4227.8,115,31.071298,85.519998,0.44
4,0,0,19.37,5.92,33.86,269.09,55.73,7.93,3,1,25.65,105,4979.96,86,16.56854,81.69172,0.84


In [35]:
y = data['crop_disease_status'].copy()

In [36]:
from sklearn.model_selection import train_test_split

In [37]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25  , random_state=169506 )

In [38]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)



(375, 17)
(375,)
(125, 17)
(125,)


In [39]:
print(X_train.ndim)
print(y_train.ndim)
print(X_test.ndim)
print(y_test.ndim)



2
1
2
1


In [40]:
print(type(X_train))
print(type(y_train))
print(type(X_test))
print(type(y_test))



<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.series.Series'>


In [41]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

In [42]:
log_reg_model = LogisticRegression()
dct = DecisionTreeClassifier()
rfc = RandomForestClassifier()
svc_model = SVC()

In [43]:
log =  log_reg_model.fit(X_train, y_train)
dct = dct.fit(X_train, y_train)
rfc = rfc.fit(X_train, y_train)
svc = svc_model.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [44]:
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    classification_report,
    confusion_matrix,
    roc_auc_score
)


In [45]:
models = {
    "Logistic Regression": log,
    "Decision Tree": dct,
    "Random Forest": rfc,
    "SVC": svc
}


In [46]:
for name, model in models.items():
    y_pred = model.predict(X_test)

    print(f"\n{name}")
    print("-" * 40)
    print("Accuracy :", accuracy_score(y_test, y_pred))
    print("Precision:", precision_score(y_test, y_pred, average="weighted"))
    print("Recall   :", recall_score(y_test, y_pred, average="weighted"))
    print("F1 Score :", f1_score(y_test, y_pred, average="weighted"))
    print("\nConfusion Matrix:")
    print(confusion_matrix(y_test, y_pred))
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))



Logistic Regression
----------------------------------------
Accuracy : 0.2
Precision: 0.168623973727422
Recall   : 0.2
F1 Score : 0.17594836627672447

Confusion Matrix:
[[ 8  0 11 19]
 [ 5  0 12 11]
 [ 9  0  8 15]
 [ 7  0 11  9]]

Classification Report:
              precision    recall  f1-score   support

           0       0.28      0.21      0.24        38
           1       0.00      0.00      0.00        28
           2       0.19      0.25      0.22        32
           3       0.17      0.33      0.22        27

    accuracy                           0.20       125
   macro avg       0.16      0.20      0.17       125
weighted avg       0.17      0.20      0.18       125


Decision Tree
----------------------------------------
Accuracy : 0.264
Precision: 0.27604303030303035
Recall   : 0.264
F1 Score : 0.26499372115643305

Confusion Matrix:
[[ 9 11 10  8]
 [ 6  7 10  5]
 [ 8  8  7  9]
 [ 2  9  6 10]]

Classification Report:
              precision    recall  f1-score   support

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


X - region,crop_type
Y - crop_disease_status

Data Normallisation/Standardization

Model Choosing