In [35]:
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report,accuracy_score

In [36]:
df = pd.read_csv("crop_data2.csv")

In [37]:
# Convert categorical variables to numerical using Label Encoding
crops_label_encoder = LabelEncoder()
df['CROPS'] = crops_label_encoder.fit_transform(df['CROPS'])
soil_label_encoder = LabelEncoder()
df['SOIL'] = soil_label_encoder.fit_transform(df['SOIL'])
type_of_crop_label_encoder = LabelEncoder()
df['TYPE_OF_CROP'] = type_of_crop_label_encoder.fit_transform(df['TYPE_OF_CROP'])
season_label_encoder = LabelEncoder()
df['SEASON'] = season_label_encoder.fit_transform(df['SEASON'])
sown_label_encoder = LabelEncoder()
df['SOWN'] = sown_label_encoder.fit_transform(df['SOWN'])
harvested_label_encoder = LabelEncoder()
df['HARVESTED'] = harvested_label_encoder.fit_transform(df['HARVESTED'])
watersource_label_encoder = LabelEncoder()
df['WATERSOURCE'] = watersource_label_encoder.fit_transform(df['WATERSOURCE'])

In [38]:
# Split the data into features (X) and target (y)
# X = df.drop(['CROPS','TYPE_OF_CROP','SEASON','SOWN','HARVESTED','SOIL'], axis=1)
X = df.drop(['CROPS'], axis=1)
y = df['CROPS']

In [39]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [40]:
# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [41]:
svm_model = SVC(kernel='linear', C=1.0, probability=True)
svm_model.fit(X_train_scaled, y_train)
svm_predictions = svm_model.predict(X_test_scaled)
svm_accuracy = accuracy_score(y_test, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy}")


SVM Accuracy: 1.0


In [42]:
# gb_model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
gb_model = xgb.XGBClassifier(objective='multi:softmax')#, num_class=num_classes)
gb_model.fit(X_train, y_train)
gb_model.fit(X_train, y_train)
gb_predictions = gb_model.predict(X_test)
gb_accuracy = accuracy_score(y_test, gb_predictions)
print(f"Gradient Boosting Accuracy: {gb_accuracy}")

Gradient Boosting Accuracy: 0.998


In [43]:
from sklearn.ensemble import VotingClassifier

# Create a VotingClassifier with SVM and Gradient Boosting
ensemble_model = VotingClassifier(estimators=[
    ('svm', svm_model),
    ('gb', gb_model)
], voting='soft')  # Use 'soft' voting for probability-based predictions

ensemble_model.fit(X_train_scaled, y_train)
ensemble_predictions = ensemble_model.predict(X_test_scaled)
ensemble_accuracy = accuracy_score(y_test, ensemble_predictions)
print(f"Ensemble Accuracy: {ensemble_accuracy}")


Ensemble Accuracy: 1.0


In [44]:
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression

# Create a stacking ensemble with SVM, Gradient Boosting, and Logistic Regression as the meta-model
estimators = [
    ('svm', svm_model),
    ('gb', gb_model)
]

stacking_model = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression())
stacking_model.fit(X_train_scaled, y_train)
stacking_predictions = stacking_model.predict(X_test_scaled)
stacking_accuracy = accuracy_score(y_test, stacking_predictions)
print(f"Stacking Accuracy: {stacking_accuracy}")


Stacking Accuracy: 1.0
