In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import time
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, roc_curve
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder

# Load the dataset
file_path = 'TARP[1].csv'
names = ['Soil Moisture', 'Temperature', 'Soil Humidity', 'Time', 'Air temperature (C)', 'Wind speed (Km/h)',
         'Air humidity (%)', 'Wind gust (Km/h)', 'Pressure (KPa)', 'ph', 'rainfall', 'N', 'P', 'K', 'status']
data = pd.read_csv(file_path, names=names, header=0)

# Data preprocessing
data=data.drop(['Pressure (KPa)', 'ph', 'rainfall', 'N', 'P', 'K'],axis=1)
print(data.describe())
print(data.head())
print(data.tail())
data = data.ffill(limit=3)
data = data.interpolate(method='linear', limit_direction='both')
data['status'] = data['status'].map({'ON': 1, 'OFF': 0})

# One-hot encoding for 'status' column
encoder = OneHotEncoder(drop='first', sparse=False)
status_encoded = encoder.fit_transform(data[['status']])
status_df = pd.DataFrame(status_encoded, columns=['status_1'])
data = pd.concat([data, status_df], axis=1)
data.drop('status', axis=1, inplace=True)

# MinMax scaling for 'Time' feature to a range of 0 to 60
#time_scaler = MinMaxScaler(feature_range=(0, 60))
#data['Time'] = time_scaler.fit_transform(data[['Time']])
print(data.describe())
print(data.head())
print(data.tail())
# Correlation matrix
correlation_matrix = data.corr()
plt.figure(figsize=(12, 8))
sns.heatmap(correlation_matrix, annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Correlation Matrix")
plt.show()

# Exploratory data analysis
# Bar plot for 'status' counts
status_counts = data['status_1'].value_counts()
plt.bar(status_counts.index, status_counts.values)
plt.xticks(status_counts.index, ['OFF', 'ON'])  # Optional labeling for x-axis
plt.title("Count of Status")
plt.xlabel("Status")
plt.ylabel("Count")
plt.show()

# Box plots
plt.figure(figsize=(15, 8))
sns.boxplot(x='status_1', y='Soil Moisture', data=data)
plt.title("Box Plot of Soil Moisture by Status")
plt.show()

plt.figure(figsize=(15, 8))
sns.boxplot(x='status_1', y='Temperature', data=data)
plt.title("Box Plot of Temperature by Status")
plt.show()

# Correlation with target variable
correlation_with_target = data.corr()['status_1'].drop('status_1')
correlation_with_target.plot(kind='bar', figsize=(12, 6))
plt.title("Correlation of Features with Status")
plt.ylabel("Correlation")
plt.xlabel("Features")
plt.show()


In [None]:
# Model training and evaluation
start=time.time()
X = data.iloc[:, :-1].values
y = data["status_1"].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=11111)
rf_model = RandomForestClassifier(n_estimators=200)
rf_model.fit(X_train, y_train)
y_pred_class_rf = rf_model.predict(X_test)
y_pred_prob_rf = rf_model.predict_proba(X_test)
accuracy = accuracy_score(y_test, y_pred_class_rf)
precision = precision_score(y_test, y_pred_class_rf)
recall = recall_score(y_test, y_pred_class_rf)
f1 = f1_score(y_test, y_pred_class_rf)
roc_auc = roc_auc_score(y_test, y_pred_prob_rf[:, 1])
end=time.time()
# Print performance metrics
print("Accuracy: {:.3f}".format(accuracy))
print("Precision: {:.3f}".format(precision))
print("Recall: {:.3f}".format(recall))
print("F1 Score: {:.3f}".format(f1))
print("AUC: {:.3f}".format(roc_auc))
print("Execution time: {:.3f}".format(end-start))
# Plot ROC curve
fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob_rf[:, 1])
plt.plot(fpr, tpr, 'b-')
plt.plot([0, 1], [0, 1], 'r--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend()
plt.show()

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

# Model training and evaluation
start=time.time()
X = data.iloc[:, :-1].values
y = data["status_1"].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=11111)

lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

y_pred = lr_model.predict(X_test)
# Evaluation metrics
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)
end=time.time()
# Print performance metrics
print("Mean Squared Error (MSE): {:.3f}".format(mse))
print("Root Mean Squared Error (RMSE): {:.3f}".format(rmse))
print("R^2 Score: {:.3f}".format(r2))
print("Execution time: {:.3f}".format(end-start))

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, roc_curve
import matplotlib.pyplot as plt

# Model training and evaluation
start=time.time()
X = data.iloc[:, :-1].values
y = data["status_1"].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=11111)

dt_model = DecisionTreeClassifier()
dt_model.fit(X_train, y_train)

y_pred_class_dt = dt_model.predict(X_test)
y_pred_prob_dt = dt_model.predict_proba(X_test)

accuracy = accuracy_score(y_test, y_pred_class_dt)
precision = precision_score(y_test, y_pred_class_dt)
recall = recall_score(y_test, y_pred_class_dt)
f1 = f1_score(y_test, y_pred_class_dt)
roc_auc = roc_auc_score(y_test, y_pred_prob_dt[:, 1])
end=time.time()
# Print performance metrics
print("Accuracy: {:.3f}".format(accuracy))
print("Precision: {:.3f}".format(precision))
print("Recall: {:.3f}".format(recall))
print("F1 Score: {:.3f}".format(f1))
print("AUC: {:.3f}".format(roc_auc))
print("Execution time: {:.3f}".format(end-start))

# Plot ROC curve
fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob_dt[:, 1])
plt.plot(fpr, tpr, 'b-')
plt.plot([0, 1], [0, 1], 'r--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend()
plt.show()


In [None]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, roc_curve
import matplotlib.pyplot as plt

# Model training and evaluation
start=time.time()
X = data.iloc[:, :-1].values
y = data["status_1"].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=11111)

svm_model = SVC(probability=True)
svm_model.fit(X_train, y_train)

y_pred_class_svm = svm_model.predict(X_test)
y_pred_prob_svm = svm_model.predict_proba(X_test)

accuracy = accuracy_score(y_test, y_pred_class_svm)
precision = precision_score(y_test, y_pred_class_svm)
recall = recall_score(y_test, y_pred_class_svm)
f1 = f1_score(y_test, y_pred_class_svm)
roc_auc = roc_auc_score(y_test, y_pred_prob_svm[:, 1])
end=time.time()
# Print performance metrics
print("Accuracy: {:.3f}".format(accuracy))
print("Precision: {:.3f}".format(precision))
print("Recall: {:.3f}".format(recall))
print("F1 Score: {:.3f}".format(f1))
print("AUC: {:.3f}".format(roc_auc))
print("Execution time: {:.3f}".format(end-start))

# Plot ROC curve
fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob_svm[:, 1])
plt.plot(fpr, tpr, 'b-')
plt.plot([0, 1], [0, 1], 'r--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend()
plt.show()


In [None]:
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, roc_curve
import matplotlib.pyplot as plt

# Model training and evaluation
start=time.time()
X = data.iloc[:, :-1].values
y = data["status_1"].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=11111)

sgd_model = SGDClassifier()
sgd_model.fit(X_train, y_train)

y_pred_class_sgd = sgd_model.predict(X_test)
y_pred_prob_sgd = sgd_model.decision_function(X_test)

accuracy = accuracy_score(y_test, y_pred_class_sgd)
precision = precision_score(y_test, y_pred_class_sgd)
recall = recall_score(y_test, y_pred_class_sgd)
f1 = f1_score(y_test, y_pred_class_sgd)
roc_auc = roc_auc_score(y_test, y_pred_prob_sgd)
end=time.time()
# Print performance metrics
print("Accuracy: {:.3f}".format(accuracy))
print("Precision: {:.3f}".format(precision))
print("Recall: {:.3f}".format(recall))
print("F1 Score: {:.3f}".format(f1))
print("AUC: {:.3f}".format(roc_auc))
print("Execution time: {:.3f}".format(end-start))

# Plot ROC curve
fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob_sgd)
plt.plot(fpr, tpr, 'b-')
plt.plot([0, 1], [0, 1], 'r--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend()
plt.show()


In [None]:
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Splitting the data into training and testing sets
start=time.time()
X = data.iloc[:, :-1].values
y = data["status_1"].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=11111)

# Training the Naive Bayes classifier
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)

# Making predictions on the test set
y_pred = nb_model.predict(X_test)

# Calculating performance metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
end=time.time()
# Printing the performance metrics
print("Accuracy: {:.3f}".format(accuracy))
print("Precision: {:.3f}".format(precision))
print("Recall: {:.3f}".format(recall))
print("F1 Score: {:.3f}".format(f1))
print("Execution time: {:.3f}".format(end-start))

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, roc_curve, accuracy_score, precision_score, recall_score, f1_score
from tensorflow import keras
start=time.time()
# Splitting the data into training and testing sets
X = data.iloc[:, :-1].values
y = data["status_1"].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=11111)

# Creating a neural network model
model = keras.Sequential([
    keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(1, activation='sigmoid')
])

# Compiling the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Training the model
model.fit(X_train, y_train, epochs=100, batch_size=32)

# Predicting probabilities for the positive class (class 1)
y_pred_prob = model.predict(X_test)
y_pred = np.round(y_pred_prob).flatten()  # Convert probabilities to binary labels

# Calculating the AUC
auc = roc_auc_score(y_test, y_pred_prob)

# Calculating performance metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
end=time.time()
# Printing the performance metrics
print("Accuracy: {:.3f}".format(accuracy))
print("Precision: {:.3f}".format(precision))
print("Recall: {:.3f}".format(recall))
print("F1 Score: {:.3f}".format(f1))
print("Execution time: {:.3f}".format(end-start))
# Plotting the ROC curve
fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob)
plt.plot(fpr, tpr, 'b-')
plt.plot([0, 1], [0, 1], 'r--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.show()


In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, roc_curve
from tensorflow import keras
from tensorflow.keras import layers

start=time.time()
# Splitting the data into training and testing sets
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=11111)

# Reshape the input data for LSTM (assuming time series data)
input_shape = (X_train.shape[1], 1)
X_train = X_train.reshape(X_train.shape[0], *input_shape)
X_test = X_test.reshape(X_test.shape[0], *input_shape)

# Creating an LSTM model
model = keras.Sequential([
    layers.LSTM(64, input_shape=input_shape),
    layers.Dense(64, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

# Compiling the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Training the model
model.fit(X_train, y_train, epochs=100, batch_size=32)

# Predicting probabilities for the positive class (class 1)
y_pred_prob = model.predict(X_test)

# Calculating the AUC
auc = roc_auc_score(y_test, y_pred_prob)
print("AUC: {:.3f}".format(auc))

# Calculating performance metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
end=time.time
# Printing the performance metrics
print("Accuracy: {:.3f}".format(accuracy))
print("Precision: {:.3f}".format(precision))
print("Recall: {:.3f}".format(recall))
print("F1 Score: {:.3f}".format(f1))
print("Execution time: {:.3f}".format(end-start))
# Plotting the ROC curve
fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob)
plt.plot(fpr, tpr, 'b-')
plt.plot([0, 1], [0, 1], 'r--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.show()
