In [None]:
#import required libraries
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn import metrics

from sklearn.preprocessing import LabelEncoder  # normalize labels- used to transform non-numerical labels to numerical labels.
from sklearn.naive_bayes import GaussianNB     #Gassian algorithm - classifier assumes that the data from each label is drawn from a simple Gaussian distribution.

#Upload the Dataset

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
data = pd.read_csv('/content/drive/MyDrive/FYP Project/Water Schedule/Data Sheet.csv')

In [None]:
data

Unnamed: 0,Week No,Date,Time,Temperature (0C),Humidity(%),Soil Moisture level1,Soil Moisture level2,Motor On/Off,Unnamed: 8
0,1st Week,3/24/2023,08.30 a.m.,30,72,0,0,On,
1,,,06.00 p.m.,29,68,1,1,Off,
2,,3/25/2023,08.00 a.m.,31,69,0,1,On,
3,,,06.45 p.m.,32,71,1,1,Off,
4,,3/26/2023,07.30 a.m.,30,72,1,1,On,
...,...,...,...,...,...,...,...,...,...
171,,,06.00 p.m.,34,76,1,1,Off,
172,,6/18/2023,08.30 a.m.,31,76,1,0,On,
173,,,06.15 p.m.,34,78,0,1,On,
174,,6/19/2023,08.15 a.m.,32,78,0,1,On,


In [None]:
data.shape

(176, 9)

#Data Analysis

In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 176 entries, 0 to 175
Data columns (total 9 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Week No               12 non-null     object 
 1   Date                  88 non-null     object 
 2   Time                  176 non-null    object 
 3   Temperature (0C)      176 non-null    int64  
 4   Humidity(%)           176 non-null    int64  
 5   Soil Moisture level1  176 non-null    int64  
 6   Soil Moisture level2  176 non-null    int64  
 7   Motor On/Off          176 non-null    object 
 8   Unnamed: 8            0 non-null      float64
dtypes: float64(1), int64(4), object(4)
memory usage: 12.5+ KB


In [None]:
data.isnull().sum()

Week No                 164
Date                     88
Time                      0
Temperature (0C)          0
Humidity(%)               0
Soil Moisture level1      0
Soil Moisture level2      0
Motor On/Off              0
Unnamed: 8              176
dtype: int64

In [None]:
data.columns

Index(['Week No', 'Date', 'Time', 'Temperature (0C)', 'Humidity(%)',
       'Soil Moisture level1', 'Soil Moisture level2', 'Motor On/Off',
       'Unnamed: 8'],
      dtype='object')

#Temperature Column

In [None]:
def set_temp(value):
  if value >= 30:
    return 'High'
  #elif 27 < value < 30:
    #return 'Medium'
  else:
    return 'Low'


data['Temperature_New']=data['Temperature (0C)'].apply(set_temp)


In [None]:
data['Temperature_New'].value_counts()

High    167
Low       9
Name: Temperature_New, dtype: int64

#Humidity Column

In [None]:
def set_humidity(percentage):
  if percentage >= 70:
    return 'High'
  else:
    return 'Low'

data['Humidity_New']=data['Humidity(%)'].apply(set_humidity)

In [None]:
data['Humidity_New'].value_counts()

High    163
Low      13
Name: Humidity_New, dtype: int64

#Soil Moisture Level Columns

In [None]:
data['Soil Moisture level1'].value_counts()

1    94
0    82
Name: Soil Moisture level1, dtype: int64

In [None]:
data['Soil Moisture level2'].value_counts()

0    100
1     76
Name: Soil Moisture level2, dtype: int64

In [None]:
data

Unnamed: 0,Week No,Date,Time,Temperature (0C),Humidity(%),Soil Moisture level1,Soil Moisture level2,Motor On/Off,Unnamed: 8,Temperature_New,Humidity_New
0,1st Week,3/24/2023,08.30 a.m.,30,72,0,0,On,,High,High
1,,,06.00 p.m.,29,68,1,1,Off,,Low,Low
2,,3/25/2023,08.00 a.m.,31,69,0,1,On,,High,Low
3,,,06.45 p.m.,32,71,1,1,Off,,High,High
4,,3/26/2023,07.30 a.m.,30,72,1,1,On,,High,High
...,...,...,...,...,...,...,...,...,...,...,...
171,,,06.00 p.m.,34,76,1,1,Off,,High,High
172,,6/18/2023,08.30 a.m.,31,76,1,0,On,,High,High
173,,,06.15 p.m.,34,78,0,1,On,,High,High
174,,6/19/2023,08.15 a.m.,32,78,0,1,On,,High,High


#Drop unnecessary columns

In [None]:
data=data.drop(columns=['Week No','Date','Time','Unnamed: 8'])
data=data.drop(columns=['Temperature (0C)','Humidity(%)'])

In [None]:
data

Unnamed: 0,Soil Moisture level1,Soil Moisture level2,Motor On/Off,Temperature_New,Humidity_New
0,0,0,On,High,High
1,1,1,Off,Low,Low
2,0,1,On,High,Low
3,1,1,Off,High,High
4,1,1,On,High,High
...,...,...,...,...,...
171,1,1,Off,High,High
172,1,0,On,High,High
173,0,1,On,High,High
174,0,1,On,High,High


#One Hot Encoding

In [None]:
#Dropping the target variable and make it is as newframe
X = data.drop(['Motor On/Off'], axis=1)
y = data['Motor On/Off']

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25)

In [None]:
def model_acc(model):
    model.fit(inputs, target)
    acc = model.score(X_test, y_test)
    print(str(model)+ ' --> ' +str(acc))

In [None]:
#Encoding the strings to Numericals
Temperature_at=LabelEncoder()
Humidity_at=LabelEncoder()


In [None]:
#Creating the new dataframe
X['Temperature_n']=Temperature_at.fit_transform(X['Temperature_New'])
X['Humidity_n']= Humidity_at.fit_transform(X['Humidity_New'])
X

Unnamed: 0,Soil Moisture level1,Soil Moisture level2,Temperature_New,Humidity_New,Temperature_n,Humidity_n
0,0,0,High,High,0,0
1,1,1,Low,Low,1,1
2,0,1,High,Low,0,1
3,1,1,High,High,0,0
4,1,1,High,High,0,0
...,...,...,...,...,...,...
171,1,1,High,High,0,0
172,1,0,High,High,0,0
173,0,1,High,High,0,0
174,0,1,High,High,0,0


In [None]:
#Drop unnecessary columns
X_n=X.drop(columns=['Temperature_New','Humidity_New'])
X_n

Unnamed: 0,Soil Moisture level1,Soil Moisture level2,Temperature_n,Humidity_n
0,0,0,0,0
1,1,1,1,1
2,0,1,0,1
3,1,1,0,0
4,1,1,0,0
...,...,...,...,...
171,1,1,0,0
172,1,0,0,0
173,0,1,0,0
174,0,1,0,0


In [None]:
#Applying the Gaussian naivebayes
classifier1 = GaussianNB()
classifier1.fit(X_n,y)

In [None]:
#Accuracy
classifier1.score(X_n,y)

0.8920454545454546

In [None]:
#Prediction
classifier1.predict([[0,0,0,0]])



array(['On'], dtype='<U3')

In [None]:
from sklearn.tree import DecisionTreeClassifier

# Create a decision tree classifier
classifier2 = DecisionTreeClassifier()

# Fit the classifier to your training data
classifier2.fit(X_n,y)


In [None]:
#Accuracy
classifier2.score(X_n,y)



0.9318181818181818

In [None]:
#Prediction
classifier2.predict([[0,0,0,0]])



array(['On'], dtype=object)

In [None]:
from sklearn.ensemble import RandomForestClassifier

# Create a Random Forest classifier
classifier3 = RandomForestClassifier(n_estimators=100, random_state=42)

# Fit the classifier to your training data
classifier3.fit(X_n,y)


In [None]:
#Accuracy
classifier3.score(X_n,y)



0.9318181818181818

In [None]:
X_n.columns

Index(['Soil Moisture level1', 'Soil Moisture level2', 'Temperature_n',
       'Humidity_n'],
      dtype='object')

In [None]:
#Prediction
classifier3.predict([[0,0,0,0]])



array(['On'], dtype=object)

In [None]:
import pickle
with open('WaterModel.pickle', 'wb') as file:
    pickle.dump(classifier3, file)