In [20]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder, StandardScaler, PowerTransformer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import cross_val_score
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, precision_score, recall_score
import pickle

# Load data
data = pd.read_csv('crop_data.csv')
print(data.sample(10))

# Label Encoding for Categorical Feature
label_encoder = LabelEncoder()
data['crop_encoded'] = label_encoder.fit_transform(data['crop'])

# Save Label Encoder
with open('label_encoder.pkl', 'wb') as f:
    pickle.dump(label_encoder, f)

# Splitting Data into Features and Target
X = data.drop(['crop', 'crop_encoded'], axis=1)
y = data['crop_encoded']

# Standard Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Save Standard Scaler
with open('standard_scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

# Power Transformation
pt = PowerTransformer(method='yeo-johnson')
X_transformed = pt.fit_transform(X_scaled)

# Save Power Transformer
with open('power_transformer.pkl', 'wb') as f:
    pickle.dump(pt, f)

# Splitting Data into Train and Test Sets
X_train, X_test, y_train, y_test = train_test_split(X_transformed, y, test_size=0.2, random_state=42)

# Initializing and Training the Model
gnb = GaussianNB()
gnb.fit(X_train, y_train)

# Making Predictions
y_pred = gnb.predict(X_test)

# Evaluating Model Performance
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted')
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')

print(f'Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')

# Confusion Matrix
cf_matrix = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cf_matrix, annot=True, fmt="d", cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title("Confusion Matrix")
plt.show()

# Cross-Validation Scores
cv_scores = cross_val_score(gnb, X_transformed, y, cv=10)
print("Cross-Validation Scores:", cv_scores)
print("Mean CV Score:", np.mean(cv_scores))

# Save the trained model
with open('crop_recommendation_model.pkl', 'wb') as f:
    pickle.dump(gnb, f)


Top 5 Recommended Crops:
cacao: 63.00%
banana: 37.00%
corn: 0.00%
coffee: 0.00%
rice: 0.00%


