In [5]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import DataLoader, TensorDataset

# 1️⃣ Load dataset
data = pd.read_csv("final_crop.csv")

# 2️⃣ Drop unwanted columns if any
if 'Unnamed: 0' in data.columns:
    data = data.drop(columns=['Unnamed: 0'])

# 3️⃣ Encode categorical columns
categorical_columns = ["Dist Name", "Crop"]
label_encoders = {}
for col in categorical_columns:
    if col in data.columns:
        le = LabelEncoder()
        data[col] = le.fit_transform(data[col])
        label_encoders[col] = le

# 4️⃣ Drop missing values
data = data.dropna()

# 5️⃣ Split features and target
features = data.drop(columns=['Yield(Kg per ha)'])
target = data['Yield(Kg per ha)']

# 6️⃣ Scale features and target
scaler = MinMaxScaler()
features_scaled = scaler.fit_transform(features)

target = target.values.reshape(-1, 1)
scaler_target = MinMaxScaler()
target_scaled = scaler_target.fit_transform(target)

# 7️⃣ Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    features_scaled, target_scaled, test_size=0.2, random_state=42
)

# 8️⃣ Convert to tensors and add channel dimension for CNN
X_train = torch.tensor(X_train, dtype=torch.float32).unsqueeze(1)  # [batch, 1, features]
y_train = torch.tensor(y_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32).unsqueeze(1)
y_test = torch.tensor(y_test, dtype=torch.float32)

# 9️⃣ Create DataLoaders
train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=32, shuffle=True)
test_loader = DataLoader(TensorDataset(X_test, y_test), batch_size=32, shuffle=False)

print("✅ Preprocessing done!")
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)


✅ Preprocessing done!
X_train shape: torch.Size([9900, 1, 7])
y_train shape: torch.Size([9900, 1])


In [7]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import DataLoader, TensorDataset

# Load data
data = pd.read_csv("final_crop.csv")

# Drop unwanted columns
if 'Unnamed: 0' in data.columns:
    data = data.drop(columns=['Unnamed: 0'])

# Encode categorical columns
categorical_columns = ["Dist Name", "Crop"]
label_encoders = {}
for col in categorical_columns:
    if col in data.columns:
        le = LabelEncoder()
        data[col] = le.fit_transform(data[col])
        label_encoders[col] = le

# Drop missing values
data = data.dropna()

# Split features & target
features = data.drop(columns=['Yield(Kg per ha)'])
target = data['Yield(Kg per ha)']

# Scale features & target
scaler = MinMaxScaler()
features_scaled = scaler.fit_transform(features)

target = target.values.reshape(-1, 1)
scaler_target = MinMaxScaler()
target_scaled = scaler_target.fit_transform(target)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    features_scaled, target_scaled, test_size=0.2, random_state=42
)

# Convert to tensors
X_train = torch.tensor(X_train, dtype=torch.float32).unsqueeze(1)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32).unsqueeze(1)
y_test = torch.tensor(y_test, dtype=torch.float32)

# Create data loaders
train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=32, shuffle=True)
test_loader = DataLoader(TensorDataset(X_test, y_test), batch_size=32, shuffle=False)

print("✅ Preprocessing done!")


✅ Preprocessing done!


In [9]:
import torch.nn as nn

class CNNModel(nn.Module):
    def __init__(self, input_dim, feature_length):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv1d(input_dim, 64, kernel_size=3, padding=1)
        self.relu = nn.ReLU()
        self.fc = nn.Linear(64 * feature_length, 1)
    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

class HybridModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, feature_length):
        super(HybridModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.cnn = nn.Conv1d(1, 64, kernel_size=3, padding=1)
        self.relu = nn.ReLU()
        self.fc_combined = nn.Linear(hidden_size + 64*feature_length, 1)
    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        lstm_out = lstm_out[:, -1, :]
        cnn_out = self.cnn(x)
        cnn_out = self.relu(cnn_out)
        cnn_out = cnn_out.view(cnn_out.size(0), -1)
        combined = torch.cat((lstm_out, cnn_out), dim=1)
        return self.fc_combined(combined)


In [11]:
import torch.optim as optim

def train_model(model, train_loader, num_epochs=50):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    for epoch in range(num_epochs):
        total_loss = 0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            y_pred = model(X_batch)
            loss = criterion(y_pred, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        if epoch % 10 == 0:
            print(f"Epoch {epoch}, Loss: {total_loss/len(train_loader):.5f}")


In [21]:
import torch
import torch.nn as nn
import torch.optim as optim

def train_model(model, train_loader, num_epochs=50):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    for epoch in range(num_epochs):
        total_loss = 0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            y_pred = model(X_batch)
            loss = criterion(y_pred, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        
        if epoch % 10 == 0:
            print(f"Epoch {epoch}, Loss: {total_loss / len(train_loader):.5f}")


In [23]:
cnn_model = CNNModel(input_dim, feature_length)
hybrid_model = HybridModel(input_size=feature_length, hidden_size=50, num_layers=2, feature_length=feature_length)

print("Training CNN...")
train_model(cnn_model, train_loader)

print("Training Hybrid...")
train_model(hybrid_model, train_loader)


Training CNN...
Epoch 0, Loss: 0.00755
Epoch 10, Loss: 0.00276
Epoch 20, Loss: 0.00185
Epoch 30, Loss: 0.00153
Epoch 40, Loss: 0.00140
Training Hybrid...
Epoch 0, Loss: 0.00723
Epoch 10, Loss: 0.00251
Epoch 20, Loss: 0.00163
Epoch 30, Loss: 0.00132
Epoch 40, Loss: 0.00117


In [25]:
import pickle

# Save PyTorch models
torch.save(cnn_model, "cnn_model.pth")
torch.save(hybrid_model, "hybrid_model.pth")

# Save scalers
with open("scaler.pkl", "wb") as f:
    pickle.dump(scaler, f)
with open("scaler_target.pkl", "wb") as f:
    pickle.dump(scaler_target, f)

# Save label encoders
with open("label_encoders.pkl", "wb") as f:
    pickle.dump(label_encoders, f)

print("✅ Models and scalers saved!")


✅ Models and scalers saved!


In [27]:
torch.save(cnn_model.state_dict(), "cnn_model_weights.pth")
torch.save(hybrid_model.state_dict(), "hybrid_model_weights.pth")
