In [None]:
import pandas as pd

# Load the dataset (CSV file path you download from Kaggle)
df = pd.read_csv("/kaggle/input/forecasting/CropDataset-Enhanced.csv")

# Display columns to confirm names
print("All columns in dataset:\n", df.columns)

# Select only required columns
cols_to_keep = [
    "Address",
    "Formatted address",
    "Longitude",
    "Location Type",
    "Crop"
]

# Filter the dataset
df_clean = df[cols_to_keep]

# Save cleaned data
df_clean.to_csv("geo_crop_cleaned.csv", index=False)

print("Cleaned dataset saved to geo_crop_cleaned.csv")


In [None]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# -----------------------------
# Load cleaned dataset
# -----------------------------
df = pd.read_csv("geo_crop_cleaned.csv")

# Keep only required columns
df = df[["Address", "Crop"]]

# Drop missing values
df.dropna(inplace=True)

# -----------------------------
# Encode Crop labels
# -----------------------------
label_encoder = LabelEncoder()
df["Crop"] = label_encoder.fit_transform(df["Crop"])

num_classes = len(label_encoder.classes_)
print("Number of crop classes:", num_classes)

# -----------------------------
# Train-test split
# -----------------------------
X_train, X_test, y_train, y_test = train_test_split(
    df["Address"],
    df["Crop"],
    test_size=0.2,
    random_state=42
)

# -----------------------------
# Text Vectorization
# -----------------------------
max_tokens = 10000
sequence_length = 50

vectorizer = tf.keras.layers.TextVectorization(
    max_tokens=max_tokens,
    output_mode="int",
    output_sequence_length=sequence_length
)

vectorizer.adapt(X_train)

# -----------------------------
# TensorFlow Dataset
# -----------------------------
train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train))
test_ds = tf.data.Dataset.from_tensor_slices((X_test, y_test))

train_ds = train_ds.batch(32).prefetch(tf.data.AUTOTUNE)
test_ds = test_ds.batch(32).prefetch(tf.data.AUTOTUNE)

# -----------------------------
# Model Definition
# -----------------------------
model = tf.keras.Sequential([
    vectorizer,
    tf.keras.layers.Embedding(max_tokens, 64),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(64, activation="relu"),
    tf.keras.layers.Dense(num_classes, activation="softmax")
])

# -----------------------------
# Compile Model
# -----------------------------
model.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

# -----------------------------
# Train Model
# -----------------------------
model.fit(
    train_ds,
    validation_data=test_ds,
    epochs=10
)

# -----------------------------
# Save Model (FIXED)
# -----------------------------
model.save("address_to_crop_model.keras")

print("Model saved successfully!")

# -----------------------------
# Prediction Function
# -----------------------------
def predict_crop(address):
    # Wrap the string in a tf.constant
    prediction = model.predict(tf.constant([address]))
    crop_index = prediction.argmax(axis=1)[0]
    return label_encoder.inverse_transform([crop_index])[0]


# -----------------------------
# Example Prediction
# -----------------------------
sample_address = "Village near Pune Maharashtra"
predicted_crop = predict_crop(sample_address)

print("Predicted Crop:", predicted_crop)


In [None]:
import pandas as pd

# Load cleaned dataset
df = pd.read_csv("geo_crop_cleaned.csv")

# Keep only required columns
df = df[["Address", "Crop"]]

# Drop missing values
df.dropna(inplace=True)

# Function to get crop(s) for an input address
def get_crop_by_address(address):
    # Filter dataset for matching addresses (case-insensitive, contains)
    matched = df[df["Address"].str.lower().str.contains(address.lower())]
    
    if matched.empty:
        return f"Address: {address}\nCrop(s): Not found in dataset"
    
    # Return all unique crops for that address
    crops = matched["Crop"].unique()
    crops_str = ", ".join(crops)
    
    return f"Address: {address}\nCrop(s): {crops_str}"

# Example usage for Amravati
sample_address = "Amravati"
result = get_crop_by_address(sample_address)

print(result)
