In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix,classification_report,accuracy_score
from sklearn.model_selection import train_test_split, GridSearchCV,RandomizedSearchCV #for hyper paremater tuning
from sklearn.preprocessing import StandardScaler,MinMaxScaler# Scales features by removing the mean and scaling to unit variance,Scales each feature by shrinking the range to a defined minimum and maximum
from sklearn.preprocessing import OrdinalEncoder,LabelEncoder,OneHotEncoder #to encode the datato other data types
#one hot encoding to represent categorical variables as numerical values,OrdinalEncoder to convert both samples and features,LabelEncoder for samples
from sklearn.svm import SVC

In [None]:
df = pd.read_csv('/content/drive/MyDrive/Crop_recommendation.csv')
df.head()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,rice
1,85,58,41,21.770462,80.319644,7.038096,226.655537,rice
2,60,55,44,23.004459,82.320763,7.840207,263.964248,rice
3,74,35,40,26.491096,80.158363,6.980401,242.864034,rice
4,78,42,42,20.130175,81.604873,7.628473,262.71734,rice


In [None]:
class_labels = df['label'].unique().tolist()
class_labels

['rice',
 'maize',
 'chickpea',
 'kidneybeans',
 'pigeonpeas',
 'mothbeans',
 'mungbean',
 'blackgram',
 'lentil',
 'pomegranate',
 'banana',
 'mango',
 'grapes',
 'watermelon',
 'muskmelon',
 'apple',
 'orange',
 'papaya',
 'coconut',
 'cotton',
 'jute',
 'coffee']

In [None]:
le = LabelEncoder()
df['label'] = le.fit_transform(df['label'])

In [None]:
df.head()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,20
1,85,58,41,21.770462,80.319644,7.038096,226.655537,20
2,60,55,44,23.004459,82.320763,7.840207,263.964248,20
3,74,35,40,26.491096,80.158363,6.980401,242.864034,20
4,78,42,42,20.130175,81.604873,7.628473,262.71734,20


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer

def preprocess_data(data):
    # Separate features and target variable
    X = df.drop('label',axis=1)
    y = df['label']

    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Handling missing values
    imputer = SimpleImputer(strategy='mean')
    X_train = imputer.fit_transform(X_train)
    X_test = imputer.transform(X_test)

    # Normalization/Scaling
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Encoding categorical variables (if any)
    # encoder = OneHotEncoder()
    # X_train = encoder.fit_transform(X_train)
    # X_test = encoder.transform(X_test)

    return X_train, X_test, y_train, y_test


In [None]:
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier

def ant_colony_optimization(X_train, y_train, num_ants=10, num_iterations=50, evaporation_rate=0.1, alpha=1, beta=1,min_selected_features=4):
    num_samples, num_features = X_train.shape
    pheromone = np.ones(num_features)
    best_solution = None
    best_score = float('-inf')

    def construct_solution():
        solutions = []
        for ant in range(num_ants):
            solution = np.zeros(num_features, dtype=int)
            # Ensure at least min_selected_features are selected
            selected_indices = np.random.choice(num_features, min_selected_features, replace=False)
            solution[selected_indices] = 1
            solutions.append(solution)
        return solutions


    def evaluate_solution(solution):
        selected_features = X_train[:, solution == 1]
        clf = SVC(kernel='poly', degree=3)  # You can use any classifier of your choice
        scores = cross_val_score(clf, selected_features, y_train, cv=5)
        return np.mean(scores)

    def update_pheromone(solutions):
        nonlocal best_solution, best_score
        pheromone = np.ones(num_features)
        for solution in solutions:
            #print(solution)
            score = evaluate_solution(solution)
            if score > best_score:
                best_solution = solution
                best_score = score
            pheromone *= (1 - evaporation_rate)
            pheromone[solution == 1] += 1 / score

    for iteration in range(num_iterations):
        solutions = construct_solution()
        update_pheromone(solutions)
    return best_solution

In [None]:
def get_selected_features(X_train, best_solution):
    selected_feature_indices = np.where(best_solution == 1)[0]
    selected_features = X_train[:, selected_feature_indices]
    return selected_features

In [None]:
def get_selected_featurestext(X_test, best_solution):
    selected_feature_indices = np.where(best_solution == 1)[0]
    selected_features = X_test[:, selected_feature_indices]
    return selected_features

In [None]:
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.cluster import MiniBatchKMeans
from sklearn.metrics import accuracy_score

def train_classification_models(X_train, y_train, best_solution, X_test, y_test):
    models = {}

    selected_features_train = get_selected_features(X_train, best_solution)
    selected_features_test = get_selected_features(X_test, best_solution)

    # Fit k-means clustering
    kmeans = MiniBatchKMeans(n_clusters=100, random_state=42)
    kmeans.fit(selected_features_train)

    # Transform selected features using k-means
    X_train_reduced = kmeans.transform(selected_features_train)
    X_test_reduced = kmeans.transform(selected_features_test)

    # Train SVM model with reduced dataset
    svm_reduced = SVC(C=1,kernel='poly')
    svm_reduced.fit(X_train_reduced, y_train)
    svm_reduced_score = accuracy_score(y_test, svm_reduced.predict(X_test_reduced))
    print("SVM with Reduced Dataset (k-means clustering) Test Accuracy:", svm_reduced_score)
    print(X_test_reduced)

    predicted_class = svm_reduced.predict(X_test_reduced)

    # Get the recommended crop based on the predicted class index
    recommended_crop = class_labels[predicted_class[0]]

    return recommended_crop,svm_reduced



In [None]:
import time
start_time=time.time()
# Data preprocessing
X_train, X_test, y_train, y_test = preprocess_data(df)

# Feature selection (PCA example)
#X_train_pca, X_test_pca = apply_pca(X_train, X_test,n_components=None)

#print("Number of features after PCA:", X_train.shape[1])
# Ant Colony Optimization
best_solution = ant_colony_optimization(X_train, y_train,num_ants=10, num_iterations=50, evaporation_rate=0.1, alpha=1, beta=1,min_selected_features=4)

print("best_solution",best_solution)
# Train classification models

#recommended_crop = recommend_crop(best_model, best_solution,class_labels,y_test, X_test)

# Display recommendation
#print("best_model",best_model)
execution_time = time.time() - start_time

# Format the execution time
hours = int(execution_time // 3600)
minutes = int((execution_time % 3600) // 60)
seconds = int(execution_time % 60)

# Display the results
print(f"Start Time: {time.strftime('%H:%M:%S', time.localtime(start_time))}")
print(f"Execution Time: {hours:02d}:{minutes:02d}:{seconds:02d}")

best_solution [1 0 1 0 1 0 1]
Start Time: 18:13:45
Execution Time: 00:02:50


In [None]:
recommended_crop,model= train_classification_models(X_train, y_train, best_solution,X_test,y_test)
print("Recommended Crop:", recommended_crop)



SVM with Reduced Dataset (k-means clustering) Test Accuracy: 0.9659090909090909
[[1.73088855 3.37419455 3.89049606 ... 4.00870712 1.36740676 3.11159849]
 [1.21432546 3.01181245 3.65309305 ... 3.56451735 0.93109956 2.83401572]
 [1.23303462 2.70364417 3.0918987  ... 2.63269665 1.59653926 2.41217455]
 ...
 [1.16341127 3.25244256 4.48809906 ... 3.67986153 0.55978506 3.23643554]
 [1.16217928 3.04861985 4.40686523 ... 3.5376794  0.4673495  3.05411637]
 [3.4509071  1.67720143 4.54208188 ... 0.744305   3.54408321 2.16413232]]
Recommended Crop: apple


In [None]:
model

In [None]:
class_labels = df['label'].unique().tolist()
class_labels

[20, 11, 3, 9, 18, 13, 14, 2, 10, 19, 1, 12, 7, 21, 15, 0, 16, 17, 4, 6, 8, 5]

In [None]:
class_labels = le.classes_
class_labels

array(['apple', 'banana', 'blackgram', 'chickpea', 'coconut', 'coffee',
       'cotton', 'grapes', 'jute', 'kidneybeans', 'lentil', 'maize',
       'mango', 'mothbeans', 'mungbean', 'muskmelon', 'orange', 'papaya',
       'pigeonpeas', 'pomegranate', 'rice', 'watermelon'], dtype=object)

In [None]:
x = df.drop('label',axis=1)
y = df['label']

In [None]:
features_data = {'columns':list(x.columns)}
features_data

{'columns': ['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']}

In [None]:
test_series = pd.Series(np.zeros(len(features_data['columns'])),index=features_data['columns'])
test_series

N              0.0
P              0.0
K              0.0
temperature    0.0
humidity       0.0
ph             0.0
rainfall       0.0
dtype: float64

In [None]:
test_series['N'] = 90
test_series['P'] = 42
test_series['K'] = 43
test_series['temperature'] = 25
test_series['humidity'] = 86
test_series['ph'] = 6.5
test_series['rainfall'] = 220

ValueError: Expected 2D array, got 1D array instead:
array=[ 90.   42.   43.   25.   86.    6.5 220. ].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

In [None]:
best_solution

array([1, 0, 1, 0, 1, 0, 1])

In [None]:
def get_selected_features_rec(X_train, best_solution):
    if isinstance(best_solution, tuple):
        selected_feature_indices = np.where(best_solution[0] == 1)[0]
    else:
        selected_feature_indices = np.where(best_solution == 1)[0]
    selected_features = X_train[selected_feature_indices]
    return selected_features

In [None]:
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.cluster import MiniBatchKMeans
from sklearn.metrics import accuracy_score

def train_classification_models(X_train, y_train, best_solution, X_test, y_test):
    models = {}

    selected_features_train = get_selected_features(X_train, best_solution)
    selected_features_train_rec = get_selected_features_rec(test_series, best_solution)
    # Fit k-means clustering
    kmeans = MiniBatchKMeans(n_clusters=100, random_state=42)
    kmeans.fit(selected_features_train)
    rec = kmeans.transform(selected_features_train_rec)


    return rec



In [None]:
def train_classification_models3(X_train, y_train, best_solution, X_test, y_test):
    models = {}

    selected_features_train = get_selected_features(X_train, best_solution)
    selected_features_test = get_selected_features(X_test, best_solution)

    # Use get_selected_features_rec to get selected features from test_series
    selected_features_train_rec = get_selected_features_rec(test_series, best_solution)

    # Reshape selected_features_train_rec into a 2-dimensional array
    selected_features_train_rec = selected_features_train_rec.reshape(-1, 1)

    # Fit k-means clustering
    kmeans = MiniBatchKMeans(n_clusters=100, random_state=42)
    kmeans.fit(selected_features_train)
    rec = kmeans.transform(selected_features_train_rec)

    return rec

In [None]:
recommended_crop,model= train_classification_models3(X_train, y_train, best_solution,X_test,y_test)
print("Recommended Crop:", recommended_crop,model)

AttributeError: 'Series' object has no attribute 'reshape'

In [None]:
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.cluster import MiniBatchKMeans
from sklearn.metrics import accuracy_score

def train_classification_models3(X_train, y_train, best_solution, X_test, y_test):
    models = {}

    selected_features_train = get_selected_features(X_train, best_solution)
    selected_features_test = get_selected_features(X_test, best_solution)
    print("selected_features_train",selected_features_train)
    # Fit k-means clustering
    kmeans = MiniBatchKMeans(n_clusters=100, random_state=42)
    kmeans.fit(selected_features_train)

    # Transform selected features using k-means
    X_train_reduced = kmeans.transform(selected_features_train)
    X_test_reduced = kmeans.transform(selected_features_test)
    print("X_train_reduced",X_train_reduced)
    # Train SVM model with reduced dataset
    svm_reduced = SVC(C=1,kernel='poly')
    svm_reduced.fit(X_train_reduced, y_train)
    svm_reduced_score = accuracy_score(y_test, svm_reduced.predict(X_test_reduced))
    print("SVM with Reduced Dataset (k-means clustering) Test Accuracy:", svm_reduced_score)
    print(X_test_reduced)

    predicted_class = svm_reduced.predict(X_test_reduced)

    # Get the recommended crop based on the predicted class index
    recommended_crop = class_labels[predicted_class[0]]

    return recommended_crop,svm_reduced



In [None]:
recommended_crop,model= train_classification_models3(X_train, y_train, best_solution,X_test,y_test)

selected_features_train [[-0.9034266  -0.6685066   0.93658618  0.0051497 ]
 [-0.36705134 -0.57058952 -0.10047048 -0.60529057]
 [-1.17161422 -0.45308903 -0.38277499 -1.04580687]
 ...
 [-1.06433917 -0.33558853 -0.89838138 -0.04373582]
 [-1.06433917  3.05234239  0.38634019 -0.56903684]
 [-0.50114515 -0.51183927 -0.41804549 -0.89653148]]
X_train_reduced [[2.38735154 2.25360001 3.65338926 ... 2.13960519 2.39973319 1.72117773]
 [2.01921202 1.27123356 3.75873458 ... 1.8756851  1.77082166 0.97714195]
 [2.92654499 0.97688256 3.86303265 ... 1.91734419 2.68537004 0.28524062]
 ...
 [2.73884249 0.69684475 3.79694475 ... 0.86340572 2.71624446 0.9098449 ]
 [3.91880065 3.94230773 0.91920739 ... 4.11190298 4.34742326 3.73929833]
 [2.33580259 0.97088023 3.85636025 ... 1.91892469 2.03515512 0.72679203]]




SVM with Reduced Dataset (k-means clustering) Test Accuracy: 0.9659090909090909
[[1.73088855 3.37419455 3.89049606 ... 4.00870712 1.36740676 3.11159849]
 [1.21432546 3.01181245 3.65309305 ... 3.56451735 0.93109956 2.83401572]
 [1.23303462 2.70364417 3.0918987  ... 2.63269665 1.59653926 2.41217455]
 ...
 [1.16341127 3.25244256 4.48809906 ... 3.67986153 0.55978506 3.23643554]
 [1.16217928 3.04861985 4.40686523 ... 3.5376794  0.4673495  3.05411637]
 [3.4509071  1.67720143 4.54208188 ... 0.744305   3.54408321 2.16413232]]
