In [1]:
# importing necessary libraries

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import classification_report
from sklearn.preprocessing import MinMaxScaler
import joblib

In [2]:
# Load the dataset (update the filename accordingly)
df = pd.read_csv("smart_irrigation_dataset.csv")

In [3]:
# Display the first 5 rows of the dataset
df.head()

Unnamed: 0.1,Unnamed: 0,sensor_0,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,sensor_6,sensor_7,sensor_8,...,sensor_13,sensor_14,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19,parcel_0,parcel_1,parcel_2
0,0,1,2,1,7,0,1,1,4,0,...,8,1,0,2,1,9,2,0,1,0
1,1,5,1,3,5,2,2,1,2,3,...,4,5,5,2,2,2,7,0,0,0
2,2,3,1,4,3,4,0,1,6,0,...,3,3,1,0,3,1,0,1,1,0
3,3,2,2,4,3,5,0,3,2,2,...,4,1,1,4,1,3,2,0,0,0
4,4,4,3,3,2,5,1,3,1,1,...,1,3,2,2,1,1,0,1,1,0


In [4]:
# Display the last 5 rows of the dataset
df.tail()

Unnamed: 0.1,Unnamed: 0,sensor_0,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,sensor_6,sensor_7,sensor_8,...,sensor_13,sensor_14,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19,parcel_0,parcel_1,parcel_2
1995,1995,4,1,2,2,1,1,1,2,1,...,1,2,3,2,1,1,0,0,0,0
1996,1996,1,3,3,3,2,2,3,3,1,...,3,3,0,1,0,6,2,1,1,1
1997,1997,1,3,3,1,1,4,8,1,0,...,2,3,4,4,4,1,0,1,0,0
1998,1998,2,1,0,2,2,0,1,3,0,...,2,4,0,2,0,3,0,0,1,0
1999,1999,0,1,4,1,2,2,6,8,5,...,2,1,1,0,5,2,1,1,1,1


In [5]:
# Show basic information about the dataset (data types, non-null counts, memory usage)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 24 columns):
 #   Column      Non-Null Count  Dtype
---  ------      --------------  -----
 0   Unnamed: 0  2000 non-null   int64
 1   sensor_0    2000 non-null   int64
 2   sensor_1    2000 non-null   int64
 3   sensor_2    2000 non-null   int64
 4   sensor_3    2000 non-null   int64
 5   sensor_4    2000 non-null   int64
 6   sensor_5    2000 non-null   int64
 7   sensor_6    2000 non-null   int64
 8   sensor_7    2000 non-null   int64
 9   sensor_8    2000 non-null   int64
 10  sensor_9    2000 non-null   int64
 11  sensor_10   2000 non-null   int64
 12  sensor_11   2000 non-null   int64
 13  sensor_12   2000 non-null   int64
 14  sensor_13   2000 non-null   int64
 15  sensor_14   2000 non-null   int64
 16  sensor_15   2000 non-null   int64
 17  sensor_16   2000 non-null   int64
 18  sensor_17   2000 non-null   int64
 19  sensor_18   2000 non-null   int64
 20  sensor_19   2000 non-null   in

In [6]:
# Show all column names in the dataset
df.columns

Index(['Unnamed: 0', 'sensor_0', 'sensor_1', 'sensor_2', 'sensor_3',
       'sensor_4', 'sensor_5', 'sensor_6', 'sensor_7', 'sensor_8', 'sensor_9',
       'sensor_10', 'sensor_11', 'sensor_12', 'sensor_13', 'sensor_14',
       'sensor_15', 'sensor_16', 'sensor_17', 'sensor_18', 'sensor_19',
       'parcel_0', 'parcel_1', 'parcel_2'],
      dtype='object')

In [7]:
# Drop the 'Unnamed: 0' column, which is usually an unnecessary index column
df = df.drop('Unnamed: 0', axis=1)

In [8]:
# Display the first 5 rows again after dropping the column
df.head()

Unnamed: 0,sensor_0,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,sensor_6,sensor_7,sensor_8,sensor_9,...,sensor_13,sensor_14,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19,parcel_0,parcel_1,parcel_2
0,1,2,1,7,0,1,1,4,0,3,...,8,1,0,2,1,9,2,0,1,0
1,5,1,3,5,2,2,1,2,3,1,...,4,5,5,2,2,2,7,0,0,0
2,3,1,4,3,4,0,1,6,0,2,...,3,3,1,0,3,1,0,1,1,0
3,2,2,4,3,5,0,3,2,2,5,...,4,1,1,4,1,3,2,0,0,0
4,4,3,3,2,5,1,3,1,1,2,...,1,3,2,2,1,1,0,1,1,0


In [9]:
# Show basic statistics (mean, std, min, max, etc.) of the dataset
df.describe()

Unnamed: 0,sensor_0,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,sensor_6,sensor_7,sensor_8,sensor_9,...,sensor_13,sensor_14,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19,parcel_0,parcel_1,parcel_2
count,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,...,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0
mean,1.437,1.659,2.6545,2.6745,2.8875,1.411,3.3155,4.2015,1.214,1.901,...,2.7315,3.416,1.2065,2.325,1.7295,2.2745,1.8135,0.6355,0.7305,0.212
std,1.321327,1.338512,1.699286,1.855875,1.816451,1.339394,2.206444,2.280241,1.386782,1.518668,...,1.774537,1.960578,1.258034,1.715181,1.561265,1.67169,1.469285,0.48141,0.443811,0.408827
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,1.0,1.0,1.0,2.0,0.0,2.0,3.0,0.0,1.0,...,1.0,2.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0
50%,1.0,1.0,2.0,2.0,3.0,1.0,3.0,4.0,1.0,2.0,...,2.0,3.0,1.0,2.0,1.0,2.0,2.0,1.0,1.0,0.0
75%,2.0,2.0,4.0,4.0,4.0,2.0,5.0,6.0,2.0,3.0,...,4.0,5.0,2.0,3.0,3.0,3.0,3.0,1.0,1.0,0.0
max,8.0,9.0,10.0,11.0,12.0,7.0,13.0,12.0,8.0,9.0,...,11.0,11.0,6.0,10.0,11.0,10.0,7.0,1.0,1.0,1.0


In [10]:
# -------------------------------
# STEP 2: DEFINE FEATURES AND LABELS
# -------------------------------

# X will hold the input features (sensor_0 to sensor_19) - first 20 columns
X = df.iloc[:, 0:20]


In [11]:
# y will hold the output/target labels (irrigation actions or results) - from column 20 onwards
y = df.iloc[:, 20:]

In [12]:
# Show a random sample of 10 rows from input features
X.sample(10)

Unnamed: 0,sensor_0,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,sensor_6,sensor_7,sensor_8,sensor_9,sensor_10,sensor_11,sensor_12,sensor_13,sensor_14,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19
910,0,0,4,3,2,0,2,5,0,0,5,2,5,2,1,0,0,0,3,3
369,1,1,2,6,1,0,2,5,0,2,6,1,8,4,5,0,3,0,4,2
1794,2,3,3,2,1,1,2,2,2,5,4,1,0,3,5,2,5,4,5,4
1627,1,0,2,5,6,0,0,5,0,4,6,4,2,5,4,1,1,0,6,1
1282,0,2,2,5,0,0,1,7,1,5,4,2,8,4,6,2,0,0,3,4
978,2,1,5,8,3,1,4,1,1,3,5,4,1,3,5,1,3,1,2,0
392,1,3,2,4,5,2,4,2,1,2,2,3,4,5,4,0,3,2,2,2
1164,2,1,2,2,3,2,8,4,2,3,5,8,4,0,1,0,2,2,0,0
1344,1,1,2,4,5,0,8,3,0,0,3,4,5,3,3,2,4,1,2,1
648,4,3,4,2,2,2,3,2,3,1,7,2,1,4,1,0,2,2,1,3


In [13]:
# Show a random sample of 10 rows from target labels
y.sample(10)

Unnamed: 0,parcel_0,parcel_1,parcel_2
1264,1,1,0
825,1,1,1
623,1,1,0
1317,0,1,0
529,1,1,0
1325,1,1,1
1929,1,1,1
410,0,0,0
1031,1,1,0
1421,0,0,0


In [14]:
# Display detailed info about input features (data types, non-null counts, etc.)
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 20 columns):
 #   Column     Non-Null Count  Dtype
---  ------     --------------  -----
 0   sensor_0   2000 non-null   int64
 1   sensor_1   2000 non-null   int64
 2   sensor_2   2000 non-null   int64
 3   sensor_3   2000 non-null   int64
 4   sensor_4   2000 non-null   int64
 5   sensor_5   2000 non-null   int64
 6   sensor_6   2000 non-null   int64
 7   sensor_7   2000 non-null   int64
 8   sensor_8   2000 non-null   int64
 9   sensor_9   2000 non-null   int64
 10  sensor_10  2000 non-null   int64
 11  sensor_11  2000 non-null   int64
 12  sensor_12  2000 non-null   int64
 13  sensor_13  2000 non-null   int64
 14  sensor_14  2000 non-null   int64
 15  sensor_15  2000 non-null   int64
 16  sensor_16  2000 non-null   int64
 17  sensor_17  2000 non-null   int64
 18  sensor_18  2000 non-null   int64
 19  sensor_19  2000 non-null   int64
dtypes: int64(20)
memory usage: 312.6 KB


In [15]:
# Display detailed info about target labels
y.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype
---  ------    --------------  -----
 0   parcel_0  2000 non-null   int64
 1   parcel_1  2000 non-null   int64
 2   parcel_2  2000 non-null   int64
dtypes: int64(3)
memory usage: 47.0 KB


In [16]:
# Print the full input feature set
X

Unnamed: 0,sensor_0,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,sensor_6,sensor_7,sensor_8,sensor_9,sensor_10,sensor_11,sensor_12,sensor_13,sensor_14,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19
0,1,2,1,7,0,1,1,4,0,3,1,3,6,8,1,0,2,1,9,2
1,5,1,3,5,2,2,1,2,3,1,3,2,2,4,5,5,2,2,2,7
2,3,1,4,3,4,0,1,6,0,2,3,2,4,3,3,1,0,3,1,0
3,2,2,4,3,5,0,3,2,2,5,3,1,2,4,1,1,4,1,3,2
4,4,3,3,2,5,1,3,1,1,2,4,5,3,1,3,2,2,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,4,1,2,2,1,1,1,2,1,2,4,3,3,1,2,3,2,1,1,0
1996,1,3,3,3,2,2,3,3,1,5,2,2,4,3,3,0,1,0,6,2
1997,1,3,3,1,1,4,8,1,0,0,3,2,4,2,3,4,4,4,1,0
1998,2,1,0,2,2,0,1,3,0,0,0,5,2,2,4,0,2,0,3,0


In [17]:
# Print the shapes (number of rows and columns) of X and y
X.shape, y.shape 

((2000, 20), (2000, 3))