In [None]:
!pip install pandas scikit-learn tensorflow opencv-python matplotlib

import pandas as pd
import numpy as np
import zipfile
import os
import shutil
import cv2
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split

In [None]:
csv_path = "/content/npk dataset.csv"

In [None]:
# Load CSV data
df = pd.read_csv(csv_path)

# Check dataset info
print(df.info())
print("Missing Values:", df.isnull().sum())

# Fill missing values (if any)
df.fillna(df.mean(), inplace=True)

In [None]:
df['Fertility Level'] = df['Output'].apply(lambda x: "Fertile" if x > 0.5 else "Not_Fertile")

In [None]:
# Normalize numerical features
scaler = StandardScaler()
numerical_columns = df.select_dtypes(include=['float64', 'int64']).columns.tolist()
df[numerical_columns] = scaler.fit_transform(df[numerical_columns])

In [None]:
# Apply PCA (keeping 95% variance)
pca = PCA(n_components=0.95)
pca_features = pca.fit_transform(df[numerical_columns])

In [None]:
# Assign Fertility Level (Threshold: Output > 0.5 is Fertile, else Not Fertile)
df['Fertility Level'] = df['Output'].apply(lambda x: "Fertile" if x > 0.5 else "Not_Fertile")

In [None]:
# Save processed CSV
df.to_csv("processed_npk_dataset.csv", index=False)

print(" CSV Preprocessing Complete!")

In [None]:
df.head()

In [None]:
import zipfile

with zipfile.ZipFile('/content/drive/MyDrive/soiltypes.zip.zip','r') as zipobj:
    zipobj.extractall('/content/extracteddata')

In [None]:
# Define Image Augmentation
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
image_datagen = ImageDataGenerator(
    rotation_range=30, width_shift_range=0.2, height_shift_range=0.2,
    shear_range=0.2, zoom_range=0.2, horizontal_flip=True,
    vertical_flip=True, rescale=1./255
)

In [None]:
# Count images in dataset
extract_path = '/content/extracteddata' # Define the extraction path here
num_images = sum([len(files) for _, _, files in os.walk(extract_path)])
print(f"Total Images Found: {num_images}")

In [None]:
# Label Images Using CSV Data ###
# Create directories for Fertile & Not Fertile
labeled_dir = "./labeled_images"
fertile_dir = os.path.join(labeled_dir, "Fertile")
not_fertile_dir = os.path.join(labeled_dir, "Not_Fertile")
os.makedirs(fertile_dir, exist_ok=True)
os.makedirs(not_fertile_dir, exist_ok=True)

In [None]:
# Map Images with Fertility Level
for img_name in os.listdir(extract_path):
    img_path = os.path.join(extract_path, img_name)

In [None]:
# Find corresponding fertility level (assuming image names match a column in CSV)
fertility_level = df[df.index.astype(str) == img_name]['Fertility Level'].values
if len(fertility_level) > 0:
    target_dir = fertile_dir if fertility_level[0] == "Fertile" else not_fertile_dir
    shutil.move(img_path, os.path.join(target_dir, img_name))

In [None]:
print(" Images labeled and organized successfully!")

In [None]:
## Split Data for CNN Training ###
train_dir = "./train"
test_dir = "./test"
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

In [None]:
import os

fertile_images = os.listdir("/content/labeled_images/Fertile")
not_fertile_images = os.listdir("/content/labeled_images/Not_Fertile")

print(f"✅ Found {len(fertile_images)} Fertile images")
print(f"✅ Found {len(not_fertile_images)} Not Fertile images")

if len(fertile_images) == 0 and len(not_fertile_images) == 0:
    print("⚠️ ERROR: No images found in labeled folders. Check the labeling step!")


In [None]:
# Split data (80% Train, 20% Test)
for category in ["Fertile", "Not_Fertile"]:
    files = os.listdir(f"./labeled_images/{category}")
    train_files, test_files = train_test_split(files, test_size=0.2, random_state=42)

    os.makedirs(f"{train_dir}/{category}", exist_ok=True)
    os.makedirs(f"{test_dir}/{category}", exist_ok=True)

    for f in train_files:
        shutil.move(f"./labeled_images/{category}/{f}", f"{train_dir}/{category}/{f}")
    for f in test_files:
        shutil.move(f"./labeled_images/{category}/{f}", f"{test_dir}/{category}/{f}")

print(" Dataset successfully split into Train/Test sets!"

In [None]:
import os
import pandas as pd
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader, Dataset
import torch
import torch.nn as nn
from PIL import Image

In [None]:
# ---- 2. Label Encoding ----
df['Fertility Level'] = df['Fertility Level'].map({'Fertile': 1, 'Not Fertile': 0})

In [None]:
# ---- 3. Custom Dataset ----
class SoilDataset(Dataset):
    def __init__(self, dataframe, image_dir, transform=None):
        self.data = dataframe
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_name = self.data.iloc[idx]['image']
        img_path = os.path.join(self.image_dir, img_name)
        image = Image.open(img_path).convert('RGB')
        label = self.data.iloc[idx]['fertility']

        if self.transform:
            image = self.transform(image)

        return image, label

In [None]:
# ---- 4. Image Transforms ----
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

In [None]:
# ---- 5. Dataset and DataLoader ----
image_dir = "/content/drive/MyDrive/extracted path"  # <-- Change this
dataset = SoilDataset(df, image_dir, transform)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)

In [None]:
# ---- 5. AlexNet Transfer Learning ----
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.alexnet(pretrained=True)

for param in model.parameters():
    param.requires_grad = False

model.classifier[6] = nn.Linear(model.classifier[6].in_features, 2)
model = model.to(device)