<a href="https://colab.research.google.com/github/MIRYALASAITEJA/2303A51930/blob/main/IEEE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Load the dataset
df = pd.read_csv('Crop_recommendation.csv')

# Prepare the data
# Separate features (X) and target (y)
X = df[['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']]
y = df['label']

# Encode the categorical target variable 'label' into numerical labels
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Initialize and train the models
models = {
    'Logistic Regression': LogisticRegression(max_iter=1000),
    'Support Vector Machine': SVC(),
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42)
}

# Iterate through models, train, predict, and print classification report
for name, model in models.items():
    print(f"--- Training {name} ---")
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(f"\nClassification Report for {name}:")
    print(classification_report(y_test, y_pred, target_names=le.classes_))
    print("--------------------------------------------------\n")

--- Training Logistic Regression ---


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(



Classification Report for Logistic Regression:
              precision    recall  f1-score   support

       apple       1.00      1.00      1.00        23
      banana       1.00      1.00      1.00        21
   blackgram       0.85      0.85      0.85        20
    chickpea       1.00      1.00      1.00        26
     coconut       1.00      1.00      1.00        27
      coffee       0.94      1.00      0.97        17
      cotton       0.80      0.94      0.86        17
      grapes       1.00      1.00      1.00        14
        jute       0.87      0.87      0.87        23
 kidneybeans       1.00      1.00      1.00        20
      lentil       0.77      0.91      0.83        11
       maize       0.94      0.81      0.87        21
       mango       1.00      1.00      1.00        19
   mothbeans       0.91      0.88      0.89        24
    mungbean       1.00      1.00      1.00        19
   muskmelon       1.00      1.00      1.00        17
      orange       1.00      1.00

In [7]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans, DBSCAN
from sklearn.metrics import silhouette_score

# Load the dataset
df = pd.read_csv('/content/Crop_recommendation.csv')

# Prepare the data for clustering
X = df[['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']]

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# --- K-Means Clustering ---
n_clusters = df['label'].nunique()  # 22 unique crops
kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init='auto')
kmeans_labels = kmeans.fit_predict(X_scaled)
df['kmeans_cluster'] = kmeans_labels

# Silhouette Score for KMeans
silhouette_kmeans = silhouette_score(X_scaled, kmeans_labels)

print("\n================= K-Means Clustering Results =================")
print(f"Number of clusters (fixed): {n_clusters}")
print("Count of data points in each cluster:")
print(df['kmeans_cluster'].value_counts().sort_index())
print(f"\nSilhouette Score (KMeans): {silhouette_kmeans:.4f}")
print("\nSample Data with KMeans cluster labels:")
print(df.head(20))

# --- DBSCAN Clustering ---
dbscan = DBSCAN(eps=0.5, min_samples=5)
dbscan_labels = dbscan.fit_predict(X_scaled)
df['dbscan_cluster'] = dbscan_labels

# Count valid clusters for silhouette (ignoring noise -1)
unique_labels = set(dbscan_labels)
n_clusters_dbscan = len(unique_labels) - (1 if -1 in unique_labels else 0)

# Silhouette Score for DBSCAN (only if more than 1 cluster)
if n_clusters_dbscan > 1:
    silhouette_dbscan = silhouette_score(X_scaled, dbscan_labels)
else:
    silhouette_dbscan = None

print("\n================= DBSCAN Clustering Results =================")
print(f"Number of clusters found: {n_clusters_dbscan}")
print("Count of data points in each cluster (-1 = outliers):")
print(df['dbscan_cluster'].value_counts().sort_index())

if silhouette_dbscan is not None:
    print(f"\nSilhouette Score (DBSCAN): {silhouette_dbscan:.4f}")
else:
    print("\nSilhouette Score (DBSCAN): Not applicable (only 1 cluster found)")

print("\nSample Data with DBSCAN cluster labels:")
print(df.head(20))

# --- Save full dataset with clustering results ---
df.to_csv("Crop_clusters_results.csv", index=False)
print("\nFull results saved as 'Crop_clusters_results.csv'")


Number of clusters (fixed): 22
Count of data points in each cluster:
kmeans_cluster
0     143
1     103
2     130
3     157
4     200
5      48
6      99
7      53
8     157
9     119
10    100
11    102
12    110
13     42
14     52
15    103
16    151
17     33
18    100
19    102
20     59
21     37
Name: count, dtype: int64

Silhouette Score (KMeans): 0.3446

Sample Data with KMeans cluster labels:
     N   P   K  temperature   humidity        ph    rainfall label  \
0   90  42  43    20.879744  82.002744  6.502985  202.935536  rice   
1   85  58  41    21.770462  80.319644  7.038096  226.655537  rice   
2   60  55  44    23.004459  82.320763  7.840207  263.964248  rice   
3   74  35  40    26.491096  80.158363  6.980401  242.864034  rice   
4   78  42  42    20.130175  81.604873  7.628473  262.717340  rice   
5   69  37  42    23.058049  83.370118  7.073454  251.055000  rice   
6   69  55  38    22.708838  82.639414  5.700806  271.324860  rice   
7   94  53  40    20.277744  82.8

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import classification_report

# Load the dataset
df = pd.read_csv('Crop_recommendation.csv')

# Prepare the data
# Separate features (X) and target (y)
X = df[['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']]
y = df['label']

# Encode the categorical target variable 'label' into numerical labels
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# One-hot encode the numerical labels for deep learning
y_one_hot = to_categorical(y_encoded)

# Scale the features for better model performance
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_one_hot, test_size=0.2, random_state=42)

# Get the number of features and output classes
n_features = X_train.shape[1]
n_classes = y_one_hot.shape[1]

# Build the Sequential deep learning model
model = Sequential()
model.add(Dense(128, input_shape=(n_features,), activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(n_classes, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Print a summary of the model architecture
print(model.summary())

# Train the model
print("--- Training the Deep Learning Model ---")
model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=1, validation_split=0.1)

# Evaluate the model on the test data
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"\nModel Accuracy on Test Data: {accuracy:.4f}")

# Generate predictions and classification report
y_pred_one_hot = model.predict(X_test)
y_pred = np.argmax(y_pred_one_hot, axis=1)
y_test_labels = np.argmax(y_test, axis=1)

print("\n--- Classification Report ---")
print(classification_report(y_test_labels, y_pred, target_names=le.classes_))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


None
--- Training the Deep Learning Model ---
Epoch 1/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - accuracy: 0.2728 - loss: 2.8228 - val_accuracy: 0.6307 - val_loss: 1.9453
Epoch 2/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7020 - loss: 1.6785 - val_accuracy: 0.8466 - val_loss: 0.9171
Epoch 3/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.8822 - loss: 0.7684 - val_accuracy: 0.8864 - val_loss: 0.5204
Epoch 4/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9126 - loss: 0.4403 - val_accuracy: 0.8807 - val_loss: 0.3896
Epoch 5/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9347 - loss: 0.2998 - val_accuracy: 0.9318 - val_loss: 0.2882
Epoch 6/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9421 - loss: 0.2379 - val_accuracy: 0.9205 - val_loss: 0

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.ensemble import VotingClassifier, StackingClassifier
from sklearn.metrics import classification_report

# Load the dataset
df = pd.read_csv('Crop_recommendation.csv')

# Prepare the data
# Separate features (X) and target (y)
X = df[['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']]
y = df['label']

# Encode the categorical target variable 'label' into numerical labels
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Scale the features for better model performance
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.2, random_state=42)

# Define the base estimators (individual models) for the ensembles
estimators = [
    ('log_reg', LogisticRegression(max_iter=1000, random_state=42)),
    ('rf_clf', RandomForestClassifier(n_estimators=100, random_state=42)),
    ('gb_clf', GradientBoostingClassifier(n_estimators=100, random_state=42))
]

## Voting Classifier (Hard Voting)
# The VotingClassifier aggregates predictions from multiple models
# and uses the majority vote to determine the final prediction.
print("--- Training VotingClassifier ---")
eclf1 = VotingClassifier(estimators=estimators, voting='hard')
eclf1 = eclf1.fit(X_train, y_train)
y_pred_voting = eclf1.predict(X_test)

print("\nClassification Report for VotingClassifier:")
print(classification_report(y_test, y_pred_voting, target_names=le.classes_))
print("\n--------------------------------------------------\n")

## Stacking Classifier
# Stacking uses the predictions of the base models as features
# for a final, meta-model (here, a Logistic Regression model).
print("--- Training StackingClassifier ---")
stacking_clf = StackingClassifier(
    estimators=estimators,
    final_estimator=LogisticRegression(random_state=42)
)
stacking_clf.fit(X_train, y_train)
y_pred_stacking = stacking_clf.predict(X_test)

print("\nClassification Report for StackingClassifier:")
print(classification_report(y_test, y_pred_stacking, target_names=le.classes_))
print("\n--------------------------------------------------\n")

--- Training VotingClassifier ---

Classification Report for VotingClassifier:
              precision    recall  f1-score   support

       apple       1.00      1.00      1.00        23
      banana       1.00      1.00      1.00        21
   blackgram       0.95      1.00      0.98        20
    chickpea       1.00      1.00      1.00        26
     coconut       1.00      1.00      1.00        27
      coffee       1.00      1.00      1.00        17
      cotton       1.00      1.00      1.00        17
      grapes       1.00      1.00      1.00        14
        jute       0.88      1.00      0.94        23
 kidneybeans       1.00      1.00      1.00        20
      lentil       0.92      1.00      0.96        11
       maize       1.00      1.00      1.00        21
       mango       1.00      1.00      1.00        19
   mothbeans       1.00      0.96      0.98        24
    mungbean       1.00      1.00      1.00        19
   muskmelon       1.00      1.00      1.00        17
  

In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report

# Load the dataset
df = pd.read_csv('Crop_recommendation.csv')

# Prepare the data
# Separate features (X) and target (y)
X = df[['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']]
y = df['label']

# Encode the categorical target variable 'label' into numerical labels
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Initialize and train the Gradient Boosting Classifier
# This algorithm builds a series of decision trees and combines their predictions.
gb_model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)

print("--- Training the Gradient Boosting Model ---")
gb_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred_gb = gb_model.predict(X_test)

# Print the classification report
print("\n--- Classification Report for Gradient Boosting Model ---")
print(classification_report(y_test, y_pred_gb, target_names=le.classes_))

--- Training the Gradient Boosting Model ---

--- Classification Report for Gradient Boosting Model ---
              precision    recall  f1-score   support

       apple       1.00      1.00      1.00        23
      banana       1.00      1.00      1.00        21
   blackgram       0.95      1.00      0.98        20
    chickpea       1.00      1.00      1.00        26
     coconut       1.00      0.96      0.98        27
      coffee       1.00      1.00      1.00        17
      cotton       0.94      1.00      0.97        17
      grapes       1.00      1.00      1.00        14
        jute       0.82      1.00      0.90        23
 kidneybeans       1.00      1.00      1.00        20
      lentil       0.92      1.00      0.96        11
       maize       1.00      0.95      0.98        21
       mango       1.00      1.00      1.00        19
   mothbeans       1.00      0.96      0.98        24
    mungbean       1.00      1.00      1.00        19
   muskmelon       1.00      1.