In [None]:
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
import joblib
import logging




In [None]:
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Load and preprocess the data
def load_and_preprocess_data(file_path):
    logging.info("Loading and preprocessing data...")
    df = pd.read_csv(file_path)
    X = df[['pH', 'Temperature', 'Rainfall', 'Humidity', 'Nitrogen', 'Phosphorus', 'Potassium']].values
    y = df['Crop'].values
    return X, y, df

# Split the data into training and testing sets
def split_data(X, y):
    logging.info("Splitting data into training and testing sets...")
    return train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
def scale_features(X_train, X_test):
    logging.info("Scaling features...")
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    return X_train_scaled, X_test_scaled, scaler

# Train the KNN model
def train_knn_model(X_train, y_train, n_neighbors=5):
    logging.info(f"Training KNN model with {n_neighbors} neighbors...")
    knn = KNeighborsClassifier(n_neighbors=n_neighbors, weights='distance')
    knn.fit(X_train, y_train)
    return knn

# Evaluate the model
def evaluate_model(model, X_test, y_test):
    logging.info("Evaluating model...")
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    logging.info(f"Model accuracy: {accuracy:.4f}")
    logging.info("Classification Report:\n" + report)
    return accuracy, report

# Save the model and scaler
def save_model(model, scaler, model_path, scaler_path):
    logging.info("Saving model and scaler...")
    joblib.dump(model, model_path)
    joblib.dump(scaler, scaler_path)

# Load the model and scaler
def load_model(model_path, scaler_path):
    logging.info("Loading model and scaler...")
    model = joblib.load(model_path)
    scaler = joblib.load(scaler_path)
    return model, scaler

# Predict crops within 70% similarity
def predict_similar_crops(model, scaler, new_data, similarity_threshold=0.7):
    logging.info(f"Predicting crops with similarity > {similarity_threshold:.2f}")
    new_data_scaled = scaler.transform(new_data)
    probabilities = model.predict_proba(new_data_scaled)[0]
    similar_crops = [(model.classes_[i], probabilities[i]) for i in range(len(probabilities)) if probabilities[i] > similarity_threshold]
    similar_crops.sort(key=lambda x: x[1], reverse=True)
    return similar_crops

# Interactive prediction function
def predict_interactive(model, scaler):
    while True:
        print("\nEnter new data for prediction (or 'q' to quit):")
        new_data = []
        for feature in ['pH', 'Temperature', 'Rainfall', 'Humidity', 'Nitrogen', 'Phosphorus', 'Potassium']:
            value = input(f"{feature}: ")
            if value.lower() == 'q':
                return
            new_data.append(float(value))

        new_data = np.array([new_data])
        similar_predictions = predict_similar_crops(model, scaler, new_data)
        
        print("\nPredicted crops with >70% similarity:")
        if similar_predictions:
            for i, (crop, probability) in enumerate(similar_predictions, 1):
                print(f"{i}. {crop}: {probability:.4f}")
        else:
            print("No crops found with >70% similarity.")


            

In [None]:
# Notebook Sections

# Section 1: Load and Preprocess Data
print("Section 1: Load and Preprocess Data")
data_file = 'datatired.csv'  # Replace with your actual data file path
X, y, df = load_and_preprocess_data(data_file)
print(f"Dataset shape: {df.shape}")
print(f"Number of unique crops: {len(df['Crop'].unique())}")
print(f"Feature names: {', '.join(df.columns[1:])}")
print(f"First few rows of the dataset:\n{df.head()}\n")



In [None]:
# Section 2: Split Data
print("Section 2: Split Data")
X_train, X_test, y_train, y_test = split_data(X, y)
print(f"Training set shape: {X_train.shape}")
print(f"Testing set shape: {X_test.shape}\n")



In [None]:
# Section 3: Scale Features
print("Section 3: Scale Features")
X_train_scaled, X_test_scaled, scaler = scale_features(X_train, X_test)
print(f"Scaled training set shape: {X_train_scaled.shape}")
print(f"Scaled testing set shape: {X_test_scaled.shape}\n")



In [None]:
# Section 4: Train Model
print("Section 4: Train Model")
knn_model = train_knn_model(X_train_scaled, y_train)
print(f"Model trained with {knn_model.n_neighbors} neighbors\n")



In [None]:
# Section 5: Evaluate Model
print("Section 5: Evaluate Model")
accuracy, report = evaluate_model(knn_model, X_test_scaled, y_test)
print(f"Model accuracy: {accuracy:.4f}")
print("Classification Report:")
print(report)



In [None]:
# Section 6: Save Model
print("Section 6: Save Model")
model_file = 'knn_crop_model.joblib'
scaler_file = 'knn_crop_scaler.joblib'
save_model(knn_model, scaler, model_file, scaler_file)
print(f"Model saved to {model_file}")
print(f"Scaler saved to {scaler_file}\n")