In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

In [2]:
df = pd.read_csv("Data_Processed/Recommendation.csv")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 97233 entries, 0 to 97232
Data columns (total 11 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   State_Name          97233 non-null  object 
 1   Crop_Type           97233 non-null  object 
 2   Crop                97233 non-null  object 
 3   N                   97233 non-null  int64  
 4   P                   97233 non-null  int64  
 5   K                   97233 non-null  int64  
 6   pH                  97233 non-null  float64
 7   rainfall            97233 non-null  float64
 8   temperature         97233 non-null  float64
 9   Area_in_hectares    97233 non-null  float64
 10  Production_in_tons  97233 non-null  float64
dtypes: float64(5), int64(3), object(3)
memory usage: 8.2+ MB


In [3]:
df = df.drop(["Production_in_tons"],axis=1)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 97233 entries, 0 to 97232
Data columns (total 10 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   State_Name        97233 non-null  object 
 1   Crop_Type         97233 non-null  object 
 2   Crop              97233 non-null  object 
 3   N                 97233 non-null  int64  
 4   P                 97233 non-null  int64  
 5   K                 97233 non-null  int64  
 6   pH                97233 non-null  float64
 7   rainfall          97233 non-null  float64
 8   temperature       97233 non-null  float64
 9   Area_in_hectares  97233 non-null  float64
dtypes: float64(4), int64(3), object(3)
memory usage: 7.4+ MB


In [4]:
data = df

In [5]:
label_encoders = {}
for column in ['State_Name', 'Crop_Type', 'Crop']:
    label_encoders[column] = LabelEncoder()
    data[column] = label_encoders[column].fit_transform(data[column])

# Normalize numerical features
scaler = StandardScaler()
numerical_features = ['N', 'P', 'K', 'pH', 'rainfall', 'temperature', 'Area_in_hectares']
data[numerical_features] = scaler.fit_transform(data[numerical_features])

In [6]:
# Split the dataset into features and target
X = data.drop('Crop', axis=1).values
y = data['Crop'].values

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# Create DataLoader for batch processing
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)