# **Data Preprocessing**

In [None]:
# ================================
# STEP 1: IMPORT REQUIRED LIBRARIES
# ================================
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, LabelEncoder

# ================================
# STEP 2: LOAD THE EXCEL FILE
# ================================
file_path = '/content/Crop_recommendation.csv'  # Update this if needed
df = pd.read_csv(file_path)

# ================================
# STEP 3: CHECK FOR MISSING VALUES
# ================================
print("🔍 Missing values before imputation:")
print(df.isnull().sum())

# ================================
# STEP 4: HANDLE MISSING VALUES (Median Imputation)
# ================================
# Columns to check: Soil nutrients (N, P, K), pH, temperature, humidity, rainfall
cols_to_impute = ['N', 'P', 'K', 'ph', 'temperature', 'humidity', 'rainfall']

for col in cols_to_impute:
    median_val = df[col].median()
    df[col].fillna(median_val, inplace=True)
    print(f"✅ Filled missing values in '{col}' with median: {median_val}")

print("\n✅ All missing values filled.\n")

# ================================
# STEP 5: FEATURE SCALING (Min-Max Normalization)
# ================================
scaler = MinMaxScaler()
df[cols_to_impute] = scaler.fit_transform(df[cols_to_impute])

print("✅ Features normalized using Min-Max Scaling:")
print(df[cols_to_impute].head())

# ================================
# STEP 6: ENCODE CATEGORICAL LABELS (Crop Types)
# ================================

# Method 1: Label Encoding (for classification models)
le = LabelEncoder()
df['label_encoded'] = le.fit_transform(df['label'])

# Optional: Save mapping for reference
label_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
print("\n✅ Crop label encoded mapping:")
print(label_mapping)

# Method 2: One-Hot Encoding (for analysis/ML models that benefit from OHE)
df_onehot = pd.get_dummies(df, columns=['label'], prefix='crop')

# Display result
print("\n📊 Final preprocessed dataset (head):")
print(df_onehot.head())

# ================================
# (Optional) Save Preprocessed Data
# ================================
# df_onehot.to_csv('/content/preprocessed_crop_data.csv', index=False)
