In [1]:
import pandas as pd
import numpy as np

import seaborn as sns


from sklearn.metrics import accuracy_score, classification_report, mean_absolute_error
from sklearn.model_selection import train_test_split

from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression


Dataset consist of 30000 rows and 14 features 

Description of each feature is as follows 

1. Random: An identifier for each record, likely indicating a random sample or batch (object type).
2. Average of chlorophyll in the plant (ACHP): The average chlorophyll content in the plant (float type).
3. Plant height rate (PHR): The rate of plant height growth (float type).
4. Average wet weight of the growth vegetative (AWWGV): The average wet weight of vegetative growth (float type).
5. Average leaf area of the plant (ALAP): The average leaf area of the plant (float type).
6. Average number of plant leaves (ANPL): The average number of leaves per plant (float type).
7. Average root diameter (ARD): The average diameter of the plant's roots (float type).
8. Average dry weight of the root (ADWR): The average dry weight of the plant's roots (float type).
9. Percentage of dry matter for vegetative growth (PDMVG): The percentage of dry matter in vegetative growth (float type).
10. Average root length (ARL): The average length of the plant's roots (float type).
11. Average wet weight of the root (AWWR): The average wet weight of the plant's roots (float type).
12. Average dry weight of vegetative plants (ADWV): The average dry weight of vegetative parts of the plant (float type).
13. Percentage of dry matter for root growth (PDMRG): The percentage of dry matter in root growth (float type).
14. Class: The class or category to which the plant record belongs (object type). 

## Data Loading

In [2]:
df = pd.read_csv("/kaggle/input/advanced-iot-agriculture-2024/Advanced_IoT_Dataset.csv")

In [3]:
df.head()

Unnamed: 0,Random,Average of chlorophyll in the plant (ACHP),Plant height rate (PHR),Average wet weight of the growth vegetative (AWWGV),Average leaf area of the plant (ALAP),Average number of plant leaves (ANPL),Average root diameter (ARD),Average dry weight of the root (ADWR),Percentage of dry matter for vegetative growth (PDMVG),Average root length (ARL),Average wet weight of the root (AWWR),Average dry weight of vegetative plants (ADWV),Percentage of dry matter for root growth (PDMRG),Class
0,R1,34.533468,54.566983,1.147449,1284.229549,4.999713,16.274918,1.70681,18.399982,19.739037,2.94924,0.209251,57.633906,SA
1,R1,34.489028,54.567692,1.14953,1284.247744,5.024259,16.269452,1.70093,18.398289,19.758836,2.943137,0.216154,57.633697,SA
2,R2,33.100405,67.067344,1.104647,1009.208996,5.007652,15.98076,1.185391,19.398789,20.840822,2.861635,0.200113,41.289875,SA
3,R1,34.498319,54.559049,1.137759,1284.227623,4.991501,16.27671,1.716396,18.413613,19.736098,2.946784,0.223092,57.645661,SA
4,R3,36.297008,45.588894,1.363205,981.47031,4.003682,16.979894,0.777428,31.423772,17.331894,2.766242,0.424172,27.898619,SA


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30000 entries, 0 to 29999
Data columns (total 14 columns):
 #   Column                                                   Non-Null Count  Dtype  
---  ------                                                   --------------  -----  
 0   Random                                                   30000 non-null  object 
 1    Average  of chlorophyll in the plant (ACHP)             30000 non-null  float64
 2    Plant height rate (PHR)                                 30000 non-null  float64
 3   Average wet weight of the growth vegetative (AWWGV)      30000 non-null  float64
 4   Average leaf area of the plant (ALAP)                    30000 non-null  float64
 5   Average number of plant leaves (ANPL)                    30000 non-null  float64
 6   Average root diameter (ARD)                              30000 non-null  float64
 7    Average dry weight of the root (ADWR)                   30000 non-null  float64
 8    Percentage of dry matter 

In [5]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Average of chlorophyll in the plant (ACHP),30000.0,38.060575,4.441682,32.664004,34.399604,36.575753,42.300836,46.431668
Plant height rate (PHR),30000.0,59.020331,11.985213,37.020398,51.059436,55.912058,67.069465,77.04061
Average wet weight of the growth vegetative (AWWGV),30000.0,1.247257,0.245989,0.848351,1.064865,1.211328,1.501692,1.775383
Average leaf area of the plant (ALAP),30000.0,1063.639833,260.712823,658.488225,875.501356,1009.249081,1243.866525,1751.031654
Average number of plant leaves (ANPL),30000.0,3.944789,0.702537,2.959173,3.013099,3.998577,4.015829,5.037358
Average root diameter (ARD),30000.0,16.285792,2.882793,11.075737,13.93335,16.248496,18.202577,23.32587
Average dry weight of the root (ADWR),30000.0,1.000981,0.456292,0.241972,0.675892,0.833799,1.313504,2.196556
Percentage of dry matter for vegetative growth (PDMVG),30000.0,22.674553,9.571091,8.021505,14.980983,21.954006,27.711471,43.660373
Average root length (ARL),30000.0,18.071061,2.93283,12.359924,15.600294,18.477019,20.365252,23.253049
Average wet weight of the root (AWWR),30000.0,2.907011,1.361928,1.126715,1.911408,2.818611,3.254133,6.908923


In [6]:
df.isnull().sum()

Random                                                     0
 Average  of chlorophyll in the plant (ACHP)               0
 Plant height rate (PHR)                                   0
Average wet weight of the growth vegetative (AWWGV)        0
Average leaf area of the plant (ALAP)                      0
Average number of plant leaves (ANPL)                      0
Average root diameter (ARD)                                0
 Average dry weight of the root (ADWR)                     0
 Percentage of dry matter for vegetative growth (PDMVG)    0
Average root length (ARL)                                  0
Average wet weight of the root (AWWR)                      0
 Average dry weight of vegetative plants (ADWV)            0
Percentage of dry matter for root growth (PDMRG)           0
Class                                                      0
dtype: int64

In [7]:
num_cols=df.select_dtypes(exclude=["object"]).columns
print(num_cols)

Index([' Average  of chlorophyll in the plant (ACHP)',
       ' Plant height rate (PHR)',
       'Average wet weight of the growth vegetative (AWWGV)',
       'Average leaf area of the plant (ALAP)',
       'Average number of plant leaves (ANPL)', 'Average root diameter (ARD)',
       ' Average dry weight of the root (ADWR)',
       ' Percentage of dry matter for vegetative growth (PDMVG)',
       'Average root length (ARL)', 'Average wet weight of the root (AWWR)',
       ' Average dry weight of vegetative plants (ADWV)',
       'Percentage of dry matter for root growth (PDMRG)'],
      dtype='object')


In [8]:
df[num_cols].corr()

Unnamed: 0,Average of chlorophyll in the plant (ACHP),Plant height rate (PHR),Average wet weight of the growth vegetative (AWWGV),Average leaf area of the plant (ALAP),Average number of plant leaves (ANPL),Average root diameter (ARD),Average dry weight of the root (ADWR),Percentage of dry matter for vegetative growth (PDMVG),Average root length (ARL),Average wet weight of the root (AWWR),Average dry weight of vegetative plants (ADWV),Percentage of dry matter for root growth (PDMRG)
Average of chlorophyll in the plant (ACHP),1.0,-0.013622,0.42746,-0.401833,-0.385251,-0.415854,-0.379075,-0.120607,-0.592119,-0.376681,0.115942,0.000107
Plant height rate (PHR),-0.013622,1.0,0.49305,0.635731,0.090495,-0.087562,-0.052952,0.514732,0.202433,0.006415,0.57355,-0.088112
Average wet weight of the growth vegetative (AWWGV),0.42746,0.49305,1.0,0.26628,-0.104903,-0.166862,-0.096338,0.39387,-0.215712,-0.059155,0.70124,0.000532
Average leaf area of the plant (ALAP),-0.401833,0.635731,0.26628,1.0,0.404359,0.326802,0.399542,0.391506,0.604169,0.419564,0.366951,-0.007883
Average number of plant leaves (ANPL),-0.385251,0.090495,-0.104903,0.404359,1.0,0.512514,0.778143,0.141591,0.537721,0.528448,-0.00244,0.555486
Average root diameter (ARD),-0.415854,-0.087562,-0.166862,0.326802,0.512514,1.0,0.833025,-0.116574,0.722004,0.963054,-0.168133,-0.144971
Average dry weight of the root (ADWR),-0.379075,-0.052952,-0.096338,0.399542,0.778143,0.833025,1.0,-0.18751,0.705628,0.876905,-0.218848,0.344392
Percentage of dry matter for vegetative growth (PDMVG),-0.120607,0.514732,0.39387,0.391506,0.141591,-0.116574,-0.18751,1.0,-0.1022,-0.195834,0.913545,0.032238
Average root length (ARL),-0.592119,0.202433,-0.215712,0.604169,0.537721,0.722004,0.705628,-0.1022,1.0,0.771567,-0.183255,-0.110905
Average wet weight of the root (AWWR),-0.376681,0.006415,-0.059155,0.419564,0.528448,0.963054,0.876905,-0.195834,0.771567,1.0,-0.196339,-0.131328


In [9]:
# sns.heatmap(df[num_cols].corr(), annot=True)

In [10]:
from sklearn.preprocessing import LabelEncoder

In [11]:
encoder=LabelEncoder()

In [12]:
df["Class"]=encoder.fit_transform(df["Class"])

In [13]:
df["Class"]

0        0
1        0
2        0
3        0
4        0
        ..
29995    5
29996    5
29997    5
29998    5
29999    5
Name: Class, Length: 30000, dtype: int64

In [14]:
#define dependent and independent variable
x=df.drop(["Class","Random"],axis=1)
y=df["Class"]

In [15]:
# split the data into train and test 
X_train, X_test, y_train, y_test=train_test_split(x,y,train_size=0.8,stratify=y,random_state=45)

In [16]:
LR_model=LogisticRegression(multi_class="auto")

In [18]:
LR_model.fit(X_train,y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [19]:
# Evalution of training data

yp_train=LR_model.predict(X_train)

In [20]:
train_accuracy=accuracy_score(y_train,yp_train)
print(f"{train_accuracy=}")

train_accuracy=1.0


In [21]:
yp_test=LR_model.predict(X_test)
test_accuracy=accuracy_score(y_test,yp_test)
print(f"{test_accuracy=}")

test_accuracy=1.0
