In [57]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import AffinityPropagation
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.svm import SVC
from sklearn.metrics import silhouette_score, adjusted_rand_score, homogeneity_completeness_v_measure
from sklearn.metrics import accuracy_score

In [58]:
df=pd.read_csv("Crop_recommendation.csv")

In [59]:
df.head()

Unnamed: 0,Nitrogen,phosphorus,potassium,temperature,humidity,ph,rainfall,label,Unnamed: 8,Unnamed: 9
0,90,42,43,20.879744,82.002744,6.502985,202.935536,rice,,
1,85,58,41,21.770462,80.319644,7.038096,226.655537,rice,,
2,60,55,44,23.004459,82.320763,7.840207,263.964248,rice,,
3,74,35,40,26.491096,80.158363,6.980401,242.864034,rice,,
4,78,42,42,20.130175,81.604873,7.628473,262.71734,rice,,


In [60]:
df=df.dropna(axis=1)

In [61]:
df.head()

Unnamed: 0,Nitrogen,phosphorus,potassium,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,rice
1,85,58,41,21.770462,80.319644,7.038096,226.655537,rice
2,60,55,44,23.004459,82.320763,7.840207,263.964248,rice
3,74,35,40,26.491096,80.158363,6.980401,242.864034,rice
4,78,42,42,20.130175,81.604873,7.628473,262.71734,rice


In [62]:
train, test = train_test_split(df, test_size=0.2, random_state=42)
X_train = train[['Nitrogen', 'phosphorus', 'potassium', 'temperature', 'humidity', 'ph', 'rainfall']].values
X_test = test[['Nitrogen', 'phosphorus', 'potassium', 'temperature', 'humidity', 'ph', 'rainfall']].values
y_train = train['label']
y_test = test['label']

**Affinity Propagation**

In [63]:
# AffinityPropagation
affinity_propagation = AffinityPropagation()
affinity_propagation.fit(X_train)

In [64]:
#AffinityPropagation

# Get the predicted cluster labels for the training data
train_cluster_labels = affinity_propagation.labels_

# Compute evaluation metrics for Affinity Propagation
silhouette_avg = silhouette_score(X_train, train_cluster_labels)
ari = adjusted_rand_score(y_train, train_cluster_labels)
homogeneity, completeness, v_measure = homogeneity_completeness_v_measure(y_train, train_cluster_labels)

# Print evaluation metrics
print("Silhouette Score:", silhouette_avg)
print("Adjusted Rand Index:", ari)
print("Homogeneity:", homogeneity)
print("Completeness:", completeness)
print("V-measure:", v_measure)


Silhouette Score: 0.368500876146879
Adjusted Rand Index: 0.7451747561934313
Homogeneity: 0.9429264229511191
Completeness: 0.8386322623658429
V-measure: 0.8877266023749769


In [65]:
# Get the predicted cluster labels for the training and test data
train_cluster_labels = affinity_propagation.predict(X_train)
test_cluster_labels = affinity_propagation.predict(X_test)

**Affinity Propagation, Decision Tree Classifier, Voting Classifier**

In [66]:
# Train a Decision Tree classifier on the original features
decision_tree = DecisionTreeClassifier(random_state=42)
decision_tree.fit(X_train, y_train)

# Create an ensemble model with Decision Tree classifier and cluster labels as features
ensemble_model = VotingClassifier(
    estimators=[('decision_tree', decision_tree)],
    voting='hard'
)

# Concatenate the cluster labels with the original features
X_train_with_clusters = np.column_stack((X_train, train_cluster_labels))
X_test_with_clusters = np.column_stack((X_test, test_cluster_labels))

# Train the ensemble model
ensemble_model.fit(X_train_with_clusters, y_train)

# Predict crop labels using the ensemble model
ensemble_predictions = ensemble_model.predict(X_test_with_clusters)


In [67]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, precision_score, recall_score, f1_score


accuracy = accuracy_score(y_test, ensemble_predictions)
print("Accuracy:", accuracy)

confusion_mat = confusion_matrix(y_test, ensemble_predictions)
print("Confusion Matrix:")
print(confusion_mat)

classification_rep = classification_report(y_test, ensemble_predictions)
print("Classification Report:")
print(classification_rep)

precision = precision_score(y_test, ensemble_predictions, average=None)
print("Precision for each class:")
print(precision)

recall = recall_score(y_test, ensemble_predictions, average=None)
print("Recall for each class:")
print(recall)

f1 = f1_score(y_test, ensemble_predictions, average='weighted')
print("F1-score:", f1)


Accuracy: 0.9863636363636363
Confusion Matrix:
[[23  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0 21  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0 20  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0 26  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0 27  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0 17  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0 17  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0 14  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0 22  0  0  0  0  0  0  0  0  0  0  0  1  0]
 [ 0  0  0  0  0  0  0  0  0 20  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0 11  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0 20  0  0  1  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0 19  0  0  0  0  0  0  0  0  0]
 [ 0  0  1  0  0  0  0  0  0  0  1  0  0 22  0  0  0  0 

**Affinity Propagation, Random Forest Classifier, Voting Classifier**

In [68]:
# Train a Random Forest classifier
random_forest = RandomForestClassifier(random_state=42)
random_forest.fit(X_train, y_train)

# Create an ensemble model with Random Forest and cluster labels as features
ensemble_model = VotingClassifier(
    estimators=[('random_forest', random_forest)],
    voting='hard'
)

# Concatenate the cluster labels with the original features
X_train_with_clusters = np.column_stack((X_train, train_cluster_labels))
X_test_with_clusters = np.column_stack((X_test, test_cluster_labels))

# Train the ensemble model
ensemble_model.fit(X_train_with_clusters, y_train)

# Predict crop labels using the ensemble model
ensemble_predictions = ensemble_model.predict(X_test_with_clusters)

# Calculate accuracy
accuracy = accuracy_score(y_test, ensemble_predictions)
print("Accuracy:", accuracy)

# Confusion matrix
confusion_mat = confusion_matrix(y_test, ensemble_predictions)
print("Confusion Matrix:")
print(confusion_mat)

# Classification report
classification_rep = classification_report(y_test, ensemble_predictions)
print("Classification Report:")
print(classification_rep)

# Precision for each class
precision = precision_score(y_test, ensemble_predictions, average=None)
print("Precision for each class:")
print(precision)

# Recall for each class
recall = recall_score(y_test, ensemble_predictions, average=None)
print("Recall for each class:")
print(recall)

# F1-score
f1 = f1_score(y_test, ensemble_predictions, average='weighted')
print("F1-score:", f1)



Accuracy: 0.9840909090909091
Confusion Matrix:
[[23  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0 21  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0 20  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0 26  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0 27  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0 17  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0 17  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0 14  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  1  0  0 22  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0 20  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0 11  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0 21  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0 19  0  0  0  0  0  0  0  0  0]
 [ 0  0  1  0  0  0  0  0  0  0  1  0  0 22  0  0  0  0 

**Affinity Propagation, Random Forest Classifier, Voting Classifier, Support Vector Machines (SVM)**

In [69]:
# Train an SVM classifier
svm = SVC(kernel='rbf', random_state=42)
svm.fit(X_train, y_train)

# Create an ensemble model with Random Forest, SVM, and cluster labels as features
ensemble_model = VotingClassifier(
    estimators=[('random_forest', random_forest), ('svm', svm)],
    voting='hard'
)

# Concatenate the cluster labels with the original features
X_train_with_clusters = np.column_stack((X_train, train_cluster_labels))
X_test_with_clusters = np.column_stack((X_test, test_cluster_labels))

# Train the ensemble model
ensemble_model.fit(X_train_with_clusters, y_train)

# Predict crop labels using the ensemble model
ensemble_predictions = ensemble_model.predict(X_test_with_clusters)

# Calculate accuracy
accuracy = accuracy_score(y_test, ensemble_predictions)
print("Accuracy:", accuracy)

# Confusion matrix
confusion_mat = confusion_matrix(y_test, ensemble_predictions)
print("Confusion Matrix:")
print(confusion_mat)

# Classification report
classification_rep = classification_report(y_test, ensemble_predictions)
print("Classification Report:")
print(classification_rep)

# Precision for each class
precision = precision_score(y_test, ensemble_predictions, average=None)
print("Precision for each class:")
print(precision)

# Recall for each class
recall = recall_score(y_test, ensemble_predictions, average=None)
print("Recall for each class:")
print(recall)

# F1-score
f1 = f1_score(y_test, ensemble_predictions, average='weighted')
print("F1-score:", f1)


Accuracy: 0.9590909090909091
Confusion Matrix:
[[23  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0 21  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0 20  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0 26  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0 27  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0 17  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0 17  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0 14  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  1  0  0 22  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0 20  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0 11  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  3  0  0  0  0 18  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0 19  0  0  0  0  0  0  0  0  0]
 [ 0  0  1  0  0  0  0  0  0  0  3  0  0 20  0  0  0  0 

**Gradient Boosting Classifier**

In [70]:
from sklearn.ensemble import GradientBoostingClassifier

# Define and train the Gradient Boosting model
gradient_boosting = GradientBoostingClassifier()
gradient_boosting.fit(X_train, y_train)

# Make predictions on the test set
y_pred = gradient_boosting.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)









Accuracy: 0.9818181818181818
Precision: 0.9842712842712842
Recall: 0.9818181818181818
F1-score: 0.9818514668069125


**Affinity Propagation, Random Forest Classifier, Voting Classifier, Support Vector Machines (SVM), Gradient Boosting Classifier**

In [71]:
from sklearn.model_selection import train_test_split
from sklearn.cluster import AffinityPropagation
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier, VotingClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Affinity Propagation
affinity_propagation = AffinityPropagation()
affinity_propagation.fit(X_train)
train_cluster_labels = affinity_propagation.predict(X_train)
test_cluster_labels = affinity_propagation.predict(X_test)

# Gradient Boosting Classifier
gradient_boosting = GradientBoostingClassifier(random_state=42)
gradient_boosting.fit(X_train, y_train)

# Random Forest Classifier
random_forest = RandomForestClassifier(random_state=42)
random_forest.fit(X_train, y_train)

# Support Vector Machines (SVM)
svm = SVC(random_state=42)
svm.fit(X_train, y_train)

# Concatenate cluster labels with the original features
X_train_with_clusters = np.column_stack((X_train, train_cluster_labels))
X_test_with_clusters = np.column_stack((X_test, test_cluster_labels))

# Voting Classifier
voting_classifier = VotingClassifier(
    estimators=[
        ('gradient_boosting', gradient_boosting),
        ('random_forest', random_forest),
        ('svm', svm)
    ],
    voting='hard'
)
voting_classifier.fit(X_train_with_clusters, y_train)

# Make predictions on the test set
y_pred = voting_classifier.predict(X_test_with_clusters)

# Evaluate the ensemble model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)


Accuracy: 0.9795454545454545
Precision: 0.9814221563460694
Recall: 0.9795454545454545
F1-score: 0.9795649902233715


**Affinity Propagation, Decision Tree Classifier, Voting Classifier, Gradient Boosting Classifier**

In [72]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.cluster import AffinityPropagation
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


# Affinity Propagation
affinity_propagation = AffinityPropagation()
affinity_propagation.fit(X_train)
train_cluster_labels = affinity_propagation.predict(X_train)
test_cluster_labels = affinity_propagation.predict(X_test)

# Decision Tree Classifier
decision_tree = DecisionTreeClassifier(random_state=42)
decision_tree.fit(X_train, y_train)

# Voting Classifier
voting_classifier = VotingClassifier(
    estimators=[
        ('decision_tree', decision_tree),
        ('gradient_boosting', gradient_boosting)
    ],
    voting='hard'
)
voting_classifier.fit(X_train, y_train)

# Gradient Boosting Classifier
gradient_boosting = GradientBoostingClassifier(random_state=42)
gradient_boosting.fit(X_train, y_train)

# Concatenate cluster labels with the original features
X_train_with_clusters = np.column_stack((X_train, train_cluster_labels))
X_test_with_clusters = np.column_stack((X_test, test_cluster_labels))

# Train the Voting Classifier on the combined features
voting_classifier.fit(X_train_with_clusters, y_train)

# Make predictions on the test set
y_pred = voting_classifier.predict(X_test_with_clusters)

# Evaluate the ensemble model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)


Accuracy: 0.9840909090909091
Precision: 0.9856065020837748
Recall: 0.9840909090909091
F1-score: 0.9840457379648412


**Affinity Propagation and Gradient Boosting Classifier**

In [73]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.cluster import AffinityPropagation
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Affinity Propagation
affinity_propagation = AffinityPropagation()
affinity_propagation.fit(X_train)
train_cluster_labels = affinity_propagation.predict(X_train)
test_cluster_labels = affinity_propagation.predict(X_test)

# Concatenate cluster labels with the original features
X_train_with_clusters = np.column_stack((X_train, train_cluster_labels))
X_test_with_clusters = np.column_stack((X_test, test_cluster_labels))

# Gradient Boosting Classifier
gradient_boosting = GradientBoostingClassifier(random_state=42)
gradient_boosting.fit(X_train_with_clusters, y_train)

# Make predictions on the test set
y_pred = gradient_boosting.predict(X_test_with_clusters)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)


Accuracy: 0.9840909090909091
Precision: 0.9856065020837748
Recall: 0.9840909090909091
F1-score: 0.9840457379648412


**Affinity Propagation, Gradient Boosting Classifier, and Voting Classifier**

In [74]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.cluster import AffinityPropagation
from sklearn.ensemble import GradientBoostingClassifier, VotingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Affinity Propagation
affinity_propagation = AffinityPropagation()
affinity_propagation.fit(X_train)
train_cluster_labels = affinity_propagation.predict(X_train)
test_cluster_labels = affinity_propagation.predict(X_test)

# Concatenate cluster labels with the original features
X_train_with_clusters = np.column_stack((X_train, train_cluster_labels))
X_test_with_clusters = np.column_stack((X_test, test_cluster_labels))

# Gradient Boosting Classifier
gradient_boosting = GradientBoostingClassifier(random_state=42)
gradient_boosting.fit(X_train_with_clusters, y_train)

# Voting Classifier
voting_classifier = VotingClassifier(
    estimators=[
        ('gradient_boosting', gradient_boosting)
    ],
    voting='hard'
)
voting_classifier.fit(X_train_with_clusters, y_train)

# Make predictions on the test set
y_pred = voting_classifier.predict(X_test_with_clusters)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)


Accuracy: 0.9840909090909091
Precision: 0.9856065020837748
Recall: 0.9840909090909091
F1-score: 0.9840457379648412


**Affinity Propagation, Decision Tree Classifier, Voting Classifier, and K-Nearest Neighbors (KNN) Classifier**

In [75]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.cluster import AffinityPropagation
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score

# Affinity Propagation for clustering
affinity_propagation = AffinityPropagation()
affinity_propagation.fit(X_train)

# Get the cluster labels for the training data
train_cluster_labels = affinity_propagation.predict(X_train)
test_cluster_labels = affinity_propagation.predict(X_test)

# Decision Tree Classifier
decision_tree = DecisionTreeClassifier(random_state=42)
decision_tree.fit(X_train, y_train)

# K-Nearest Neighbors (KNN) Classifier
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)

# Concatenate the cluster labels with the original features
X_train_with_clusters = np.column_stack((X_train, train_cluster_labels))
X_test_with_clusters = np.column_stack((X_test, test_cluster_labels))

# Voting Classifier
voting_classifier = VotingClassifier(
    estimators=[
        ('decision_tree', decision_tree),
        ('knn', knn)
    ],
    voting='hard'
)
voting_classifier.fit(X_train_with_clusters, y_train)

# Make predictions on the test set
y_pred = voting_classifier.predict(X_test_with_clusters)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

Accuracy: 0.9613636363636363
Precision: 0.9662698010889048
Recall: 0.9613636363636363
F1-score: 0.9610867799689151


**Affinity Propagation, Decision Tree Classifier, Voting Classifier, and Naive Bayes Classifier**

In [76]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.cluster import AffinityPropagation
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score

# Affinity Propagation for clustering
affinity_propagation = AffinityPropagation()
affinity_propagation.fit(X_train)

# Get the cluster labels for the training data
train_cluster_labels = affinity_propagation.predict(X_train)
test_cluster_labels = affinity_propagation.predict(X_test)

# Decision Tree Classifier
decision_tree = DecisionTreeClassifier(random_state=42)
decision_tree.fit(X_train, y_train)

# Naive Bayes Classifier
naive_bayes = GaussianNB()
naive_bayes.fit(X_train, y_train)

# Concatenate the cluster labels with the original features
X_train_with_clusters = np.column_stack((X_train, train_cluster_labels))
X_test_with_clusters = np.column_stack((X_test, test_cluster_labels))

# Voting Classifier
voting_classifier = VotingClassifier(
    estimators=[
        ('decision_tree', decision_tree),
        ('naive_bayes', naive_bayes)
    ],
    voting='hard'
)
voting_classifier.fit(X_train_with_clusters, y_train)

# Make predictions on the test set
y_pred = voting_classifier.predict(X_test_with_clusters)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)


Accuracy: 0.9863636363636363
Precision: 0.9874868187368187
Recall: 0.9863636363636363
F1-score: 0.9863322125173449


**Affinity Propagation, Decision Tree Classifier, Voting Classifier, and Logistic Regression**

In [77]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.cluster import AffinityPropagation
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
from warnings import filterwarnings

filterwarnings("ignore")

# Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Affinity Propagation for clustering
affinity_propagation = AffinityPropagation()
affinity_propagation.fit(X_train_scaled)

# Get the cluster labels for the training data
train_cluster_labels = affinity_propagation.predict(X_train_scaled)
test_cluster_labels = affinity_propagation.predict(X_test_scaled)

# Decision Tree Classifier
decision_tree = DecisionTreeClassifier(random_state=42)
decision_tree.fit(X_train_scaled, y_train)

# Logistic Regression Classifier
logistic_regression = LogisticRegression(max_iter=1000, random_state=42)
logistic_regression.fit(X_train_scaled, y_train)

# Concatenate the cluster labels with the original features
X_train_with_clusters = np.column_stack((X_train_scaled, train_cluster_labels))
X_test_with_clusters = np.column_stack((X_test_scaled, test_cluster_labels))

# Voting Classifier
voting_classifier = VotingClassifier(
    estimators=[
        ('decision_tree', decision_tree),
        ('logistic_regression', logistic_regression)
    ],
    voting='hard'
)
voting_classifier.fit(X_train_with_clusters, y_train)

# Make predictions on the test set
y_pred = voting_classifier.predict(X_test_with_clusters)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)


Accuracy: 0.9590909090909091
Precision: 0.9637169833624873
Recall: 0.9590909090909091
F1-score: 0.9584668535939738


**Affinity Propagation, Support Vector Machines (SVM), and Voting Classifier**

In [78]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.cluster import AffinityPropagation
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
from warnings import filterwarnings

filterwarnings("ignore")

# Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Affinity Propagation for clustering
affinity_propagation = AffinityPropagation()
affinity_propagation.fit(X_train_scaled)

# Get the cluster labels for the training data
train_cluster_labels = affinity_propagation.predict(X_train_scaled)
test_cluster_labels = affinity_propagation.predict(X_test_scaled)

# Support Vector Machines (SVM) Classifier
svm_classifier = SVC(kernel='rbf', random_state=42)
svm_classifier.fit(X_train_scaled, y_train)

# Concatenate the cluster labels with the original features
X_train_with_clusters = np.column_stack((X_train_scaled, train_cluster_labels))
X_test_with_clusters = np.column_stack((X_test_scaled, test_cluster_labels))

# Voting Classifier
voting_classifier = VotingClassifier(
    estimators=[
        ('svm', svm_classifier)
    ],
    voting='hard'
)
voting_classifier.fit(X_train_with_clusters, y_train)

# Make predictions on the test set
y_pred = voting_classifier.predict(X_test_with_clusters)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)


Accuracy: 0.45227272727272727
Precision: 0.4043712784824215
Recall: 0.45227272727272727
F1-score: 0.35740599378471605


**Affinity Propagation, Decision Tree Classifier, Voting Classifier, and regularization methods like L1 or L2 regularization**

In [79]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.cluster import AffinityPropagation
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
from warnings import filterwarnings

filterwarnings("ignore")

# Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Affinity Propagation for clustering
affinity_propagation = AffinityPropagation()
affinity_propagation.fit(X_train_scaled)

# Get the cluster labels for the training data
train_cluster_labels = affinity_propagation.predict(X_train_scaled)
test_cluster_labels = affinity_propagation.predict(X_test_scaled)

# Decision Tree Classifier
decision_tree = DecisionTreeClassifier(random_state=42)
decision_tree.fit(X_train_scaled, y_train)

# Concatenate the cluster labels with the original features
X_train_with_clusters = np.column_stack((X_train_scaled, train_cluster_labels))
X_test_with_clusters = np.column_stack((X_test_scaled, test_cluster_labels))

# Voting Classifier
voting_classifier = VotingClassifier(
    estimators=[
        ('decision_tree', decision_tree)
    ],
    voting='hard'
)
voting_classifier.fit(X_train_with_clusters, y_train)

# Regularization using Logistic Regression with L1 or L2 regularization
logistic_regression = LogisticRegression(penalty='l2', random_state=42)
logistic_regression.fit(X_train_scaled, y_train)

# Make predictions on the test set
y_pred_voting = voting_classifier.predict(X_test_with_clusters)
y_pred_logistic = logistic_regression.predict(X_test_scaled)

# Evaluate the Voting Classifier
accuracy_voting = accuracy_score(y_test, y_pred_voting)
precision_voting = precision_score(y_test, y_pred_voting, average='weighted')
recall_voting = recall_score(y_test, y_pred_voting, average='weighted')
f1_voting = f1_score(y_test, y_pred_voting, average='weighted')

print("Voting Classifier:")
print("Accuracy:", accuracy_voting)
print("Precision:", precision_voting)
print("Recall:", recall_voting)
print("F1-score:", f1_voting)

# Evaluate the Logistic Regression
accuracy_logistic = accuracy_score(y_test, y_pred_logistic)
precision_logistic = precision_score(y_test, y_pred_logistic, average='weighted')
recall_logistic = recall_score(y_test, y_pred_logistic, average='weighted')
f1_logistic = f1_score(y_test, y_pred_logistic, average='weighted')

print("Logistic Regression:")
print("Accuracy:", accuracy_logistic)
print("Precision:", precision_logistic)
print("Recall:", recall_logistic)
print("F1-score:", f1_logistic)

from sklearn.metrics import accuracy_score

# Make predictions on the test set
y_pred_voting = voting_classifier.predict(X_test_with_clusters)

# Calculate the overall accuracy
accuracy = accuracy_score(y_test, y_pred_voting)

print("Overall Accuracy:", accuracy)


Voting Classifier:
Accuracy: 0.9886363636363636
Precision: 0.9889971139971141
Recall: 0.9886363636363636
F1-score: 0.988595068964984
Logistic Regression:
Accuracy: 0.9636363636363636
Precision: 0.9644420567548909
Recall: 0.9636363636363636
F1-score: 0.9635115059268676
Overall Accuracy: 0.9886363636363636


**Affinity Propagation, Decision Tree Classifier, Voting Classifier, Random Forest**

In [80]:
from sklearn.cluster import AffinityPropagation
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier, RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# Affinity Propagation
affinity_propagation = AffinityPropagation()
affinity_propagation.fit(X_train)
train_cluster_labels = affinity_propagation.labels_
test_cluster_labels = affinity_propagation.predict(X_test)

# Decision Tree Classifier
decision_tree = DecisionTreeClassifier(random_state=42)
decision_tree.fit(X_train[:, :-1], y_train)  # Exclude the last column which represents the cluster labels

# Voting Classifier with Decision Tree and Random Forest
voting_classifier = VotingClassifier(
    estimators=[('decision_tree', decision_tree), ('random_forest', RandomForestClassifier(random_state=42))],
    voting='hard'
)
voting_classifier.fit(X_train, y_train)

# Concatenate cluster labels with original features
X_train_with_clusters = np.column_stack((X_train[:, :-1], train_cluster_labels))
X_test_with_clusters = np.column_stack((X_test[:, :-1], test_cluster_labels))

# Make predictions on the test set using the ensemble model
ensemble_predictions = voting_classifier.predict(X_test_with_clusters)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)


Accuracy: 0.45227272727272727
Precision: 0.4043712784824215
Recall: 0.45227272727272727
F1-score: 0.35740599378471605


**Affinity Propagation, Decision Tree Classifier, Voting Classifier, XGBoost**

In [81]:
import numpy as np
from sklearn.cluster import AffinityPropagation
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# Affinity Propagation
affinity_propagation = AffinityPropagation()
affinity_propagation.fit(X_train)
train_cluster_labels = affinity_propagation.labels_
test_cluster_labels = affinity_propagation.predict(X_test)

# Decision Tree Classifier
decision_tree = DecisionTreeClassifier(random_state=42)
decision_tree.fit(X_train, y_train)

# Voting Classifier with Decision Tree and XGBoost
voting_classifier = VotingClassifier(
    estimators=[('decision_tree', decision_tree), ('xgboost', XGBClassifier(random_state=42))],
    voting='hard'
)
voting_classifier.fit(X_train, y_train)

# Concatenate cluster labels with original features
X_train_with_clusters = np.column_stack((X_train, train_cluster_labels))
X_test_with_clusters = np.column_stack((X_test, test_cluster_labels))

# Select the columns for training and prediction
train_columns = list(range(X_train.shape[1]))  # Include all columns
test_columns = list(range(X_test.shape[1]))  # Include all columns

# Make predictions on the test set using the ensemble model
ensemble_predictions = voting_classifier.predict(X_test_with_clusters[:, test_columns])


# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)


Accuracy: 0.45227272727272727
Precision: 0.4043712784824215
Recall: 0.45227272727272727
F1-score: 0.35740599378471605


**Affinity Propagation, Decision Tree Classifier, Voting Classifier, LightGBM**

In [82]:
import numpy as np
from sklearn.cluster import AffinityPropagation
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier
import lightgbm as lgb
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# Affinity Propagation
affinity_propagation = AffinityPropagation()
affinity_propagation.fit(X_train)
train_cluster_labels = affinity_propagation.labels_
test_cluster_labels = affinity_propagation.predict(X_test)

# Decision Tree Classifier
decision_tree = DecisionTreeClassifier(random_state=42)
decision_tree.fit(X_train, y_train)

# Voting Classifier with Decision Tree and LightGBM
voting_classifier = VotingClassifier(
    estimators=[('decision_tree', decision_tree), ('lightgbm', lgb.LGBMClassifier(random_state=42))],
    voting='hard'
)
voting_classifier.fit(X_train, y_train)

# Concatenate cluster labels with original features
X_train_with_clusters = np.column_stack((X_train, train_cluster_labels))
X_test_with_clusters = np.column_stack((X_test, test_cluster_labels))

# Select the columns for training and prediction
train_columns = list(range(X_train.shape[1]))  # Include all columns
test_columns = list(range(X_test.shape[1]))  # Include all columns

# Make predictions on the test set using the ensemble model
ensemble_predictions = voting_classifier.predict(X_test_with_clusters[:, test_columns])

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)


Accuracy: 0.45227272727272727
Precision: 0.4043712784824215
Recall: 0.45227272727272727
F1-score: 0.35740599378471605


**Affinity Propagation, Decision Tree Classifier, Voting Classifier,
 AdaBoost**

In [83]:
import numpy as np
from sklearn.cluster import AffinityPropagation
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier, AdaBoostClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# Affinity Propagation
affinity_propagation = AffinityPropagation()
affinity_propagation.fit(X_train)
train_cluster_labels = affinity_propagation.labels_
test_cluster_labels = affinity_propagation.predict(X_test)

# Decision Tree Classifier
decision_tree = DecisionTreeClassifier(random_state=42)
decision_tree.fit(X_train, y_train)

# Voting Classifier with Decision Tree and AdaBoost
voting_classifier = VotingClassifier(
    estimators=[('decision_tree', decision_tree), ('adaboost', AdaBoostClassifier(random_state=42))],
    voting='hard'
)
voting_classifier.fit(X_train, y_train)

# Concatenate cluster labels with original features
X_train_with_clusters = np.column_stack((X_train, train_cluster_labels))
X_test_with_clusters = np.column_stack((X_test, test_cluster_labels))

# Select the columns for training and prediction
train_columns = list(range(X_train.shape[1]))  # Include all columns
test_columns = list(range(X_test.shape[1]))  # Include all columns

# Make predictions on the test set using the ensemble model
ensemble_predictions = voting_classifier.predict(X_test_with_clusters[:, test_columns])

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

Accuracy: 0.45227272727272727
Precision: 0.4043712784824215
Recall: 0.45227272727272727
F1-score: 0.35740599378471605


**Affinity Propagation, Decision Tree Classifier, Voting Classifier, regularization methods like L1 or L2 regularization, Naive Bayes Classifier**

In [84]:
import numpy as np
from sklearn.cluster import AffinityPropagation
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# Affinity Propagation
affinity_propagation = AffinityPropagation()
affinity_propagation.fit(X_train)
train_cluster_labels = affinity_propagation.labels_
test_cluster_labels = affinity_propagation.predict(X_test)

# Decision Tree Classifier
decision_tree = DecisionTreeClassifier(random_state=42)
decision_tree.fit(X_train, y_train)

# Voting Classifier with Decision Tree, Logistic Regression, and Naive Bayes
voting_classifier = VotingClassifier(
    estimators=[('decision_tree', decision_tree),
                ('logistic_regression', LogisticRegression(penalty='l2', random_state=42)),
                ('naive_bayes', MultinomialNB())],
    voting='hard'
)
voting_classifier.fit(X_train, y_train)

# Concatenate cluster labels with original features
X_train_with_clusters = np.column_stack((X_train, train_cluster_labels))
X_test_with_clusters = np.column_stack((X_test, test_cluster_labels))

# Select the columns for training and prediction
train_columns = list(range(X_train.shape[1]))  # Include all columns
test_columns = list(range(X_test.shape[1]))  # Include all columns

# Make predictions on the test set using the ensemble model
ensemble_predictions = voting_classifier.predict(X_test_with_clusters[:, test_columns])

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)


Accuracy: 0.45227272727272727
Precision: 0.4043712784824215
Recall: 0.45227272727272727
F1-score: 0.35740599378471605


**Affinity Propagation, Decision Tree Classifier, Voting Classifier, regularization methods like L1 or L2 regularization, Naive Bayes Classifier, k-fold cross-validation**

In [85]:
import pandas as pd
from sklearn.cluster import AffinityPropagation
from sklearn.ensemble import VotingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Step 1: Preprocess the data
train, test = train_test_split(df, test_size=0.2, random_state=42)
X_train = train[['Nitrogen', 'phosphorus', 'potassium', 'temperature', 'humidity', 'ph', 'rainfall']].values
X_test = test[['Nitrogen', 'phosphorus', 'potassium', 'temperature', 'humidity', 'ph', 'rainfall']].values
y_train = train['label']
y_test = test['label']

# Step 2: Perform clustering with Affinity Propagation
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

affinity_propagation = AffinityPropagation()
cluster_labels = affinity_propagation.fit_predict(X_train_scaled)

# Step 3: Train the ensemble model
# Prepare features and labels based on cluster labels
X_train_clustered = pd.DataFrame(X_train, columns=['Nitrogen', 'phosphorus', 'potassium', 'temperature', 'humidity', 'ph', 'rainfall'])
X_train_clustered['cluster'] = cluster_labels

# Define the ensemble models
decision_tree = DecisionTreeClassifier()
voting_classifier = VotingClassifier(estimators=[('lr', LogisticRegression()), ('nb', GaussianNB())])

# Fit the models to the training data
decision_tree.fit(X_train_clustered, y_train)
voting_classifier.fit(X_train_clustered, y_train)

# Step 4: Evaluate the ensemble model
kfold = 5
accuracy_decision_tree = cross_val_score(decision_tree, X_train_clustered, y_train, cv=kfold).mean()
accuracy_voting_classifier = cross_val_score(voting_classifier, X_train_clustered, y_train, cv=kfold).mean()

# Step 5: Make crop recommendations
crop_recommendations = {}

# Perform clustering on the test data
X_test_scaled = scaler.transform(X_test)
test_cluster_labels = affinity_propagation.predict(X_test_scaled)

# Prepare test data with cluster labels
X_test_clustered = pd.DataFrame(X_test, columns=['Nitrogen', 'phosphorus', 'potassium', 'temperature', 'humidity', 'ph', 'rainfall'])
X_test_clustered['cluster'] = test_cluster_labels

# Predict crop labels for test data using the ensemble models
decision_tree_predictions = decision_tree.predict(X_test_clustered)
voting_classifier_predictions = voting_classifier.predict(X_test_clustered)

# Calculate accuracy of the ensemble models on the test data
accuracy_decision_tree_test = accuracy_score(y_test, decision_tree_predictions)
accuracy_voting_classifier_test = accuracy_score(y_test, voting_classifier_predictions)

# Calculate the overall accuracy using majority voting
ensemble_predictions = pd.DataFrame({'Decision Tree': decision_tree_predictions, 'Voting Classifier': voting_classifier_predictions})
ensemble_predictions['Majority Vote'] = ensemble_predictions.mode(axis=1)[0]
accuracy_ensemble = accuracy_score(y_test, ensemble_predictions['Majority Vote'])

print("Accuracy (Decision Tree Classifier):", accuracy_decision_tree_test)
print("Accuracy (Voting Classifier):", accuracy_voting_classifier_test)
print("Overall Accuracy (Ensemble):", accuracy_ensemble)


Accuracy (Decision Tree Classifier): 0.9863636363636363
Accuracy (Voting Classifier): 0.9613636363636363
Overall Accuracy (Ensemble): 0.9636363636363636
