<a href="https://colab.research.google.com/github/Custom-made-atoms/Hackvel/blob/main/ML-VelTech.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_absolute_error, r2_score

# Load dataset
file_path = "/crop_yield.csv"  # Updated file path to training dataset
try:
    df = pd.read_csv(file_path)
except FileNotFoundError:
    print(f"Error: Dataset file not found at {file_path}")
    exit()  # Exit if the file is not found

# Check column names
print("Columns in the dataset:", df.columns.tolist())

# Convert "Production" and "Area" to numeric values (if they exist)
if "Production" in df.columns and "Area" in df.columns:
    df["Production"] = pd.to_numeric(df["Production"], errors="coerce")
    df["Area"] = pd.to_numeric(df["Area"], errors="coerce")
else:
    raise KeyError("The dataset must contain 'Production' and 'Area' columns.")

# Handle missing values
df.dropna(subset=["Production", "Area"], inplace=True)

# Create target variable (Yield = Production / Area)
df["Yield"] = df["Production"] / df["Area"]

# Feature Engineering: Convert categorical variables to numeric
# Remove extra whitespace before encoding
label_encoders = {}
categorical_cols = ["Crop", "Season", "State"]
for col in categorical_cols:
    if col in df.columns:
        # Strip leading/trailing whitespace from the column values
        df[col] = df[col].astype(str).str.strip()
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col])
        label_encoders[col] = le
    else:
        print(f"Warning: Column '{col}' not found in the dataset.")

# Define features and target variable
X = df.drop(columns=["Yield", "Production", "Area"])
y = df["Yield"]

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate model
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(((y_pred - y_test) ** 2).mean())

print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"R² Score: {r2:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")

# Prediction function that checks for valid categorical input values
def predict_crop_yield(crop, crop_year, season, state, annual_rainfall, fertilizer, pesticide):
    # Strip input values to avoid mismatches due to extra whitespace
    crop = crop.strip()
    season = season.strip()
    state = state.strip()

    # Check if provided categorical values exist in the training data
    if crop not in label_encoders["Crop"].classes_:
        return f"Error: Crop '{crop}' not found. Valid options: {list(label_encoders['Crop'].classes_)}"
    if season not in label_encoders["Season"].classes_:
        return f"Error: Season '{season}' not found. Valid options: {list(label_encoders['Season'].classes_)}"
    if state not in label_encoders["State"].classes_:
        return f"Error: State '{state}' not found. Valid options: {list(label_encoders['State'].classes_)}"

    # Encode categorical values
    crop_enc = label_encoders["Crop"].transform([crop])[0]
    season_enc = label_encoders["Season"].transform([season])[0]
    state_enc = label_encoders["State"].transform([state])[0]

    # Construct input DataFrame with the correct order of features as in training
    input_data = pd.DataFrame([[crop_enc, crop_year, season_enc, state_enc, annual_rainfall, fertilizer, pesticide]],
                              columns=X.columns)
    predicted_yield = model.predict(input_data)
    return f"Predicted Yield: {predicted_yield[0]:.2f} metric tons per hectare"

# Example usage of the prediction function:
# Now "Kharif" (without extra spaces) will be recognized.
print(predict_crop_yield("Arecanut", 1997, "Kharif", "Andhra Pradesh", 1200, 50, 5))


Columns in the dataset: ['Crop', 'Crop_Year', 'Season', 'State', 'Area', 'Production', 'Annual_Rainfall', 'Fertilizer', 'Pesticide', 'Yield']
Mean Absolute Error (MAE): 220.31
R² Score: 0.03
Root Mean Squared Error (RMSE): 932.86
Predicted Yield: 136.59 metric tons per hectare


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_absolute_error, r2_score

file_path = "/crop_yield.csv"
try:
    df = pd.read_csv(file_path)
except FileNotFoundError:
    print(f"Error: Dataset file not found at {file_path}")
    exit()

print("Columns in the dataset:", df.columns.tolist())

if "Production" in df.columns and "Area" in df.columns:
    df["Production"] = pd.to_numeric(df["Production"], errors="coerce")
    df["Area"] = pd.to_numeric(df["Area"], errors="coerce")
else:
    raise KeyError("The dataset must contain 'Production' and 'Area' columns.")

df.dropna(subset=["Production", "Area"], inplace=True)

df["Yield"] = df["Production"] / df["Area"]

label_encoders = {}
categorical_cols = ["Crop", "Season", "State"]
for col in categorical_cols:
    if col in df.columns:
        df[col] = df[col].astype(str).str.strip()
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col])
        label_encoders[col] = le
    else:
        print(f"Warning: Column '{col}' not found in the dataset.")

X = df.drop(columns=["Yield", "Production", "Area"])
y = df["Yield"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(((y_pred - y_test) ** 2).mean())

print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"R² Score: {r2:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")

def predict_crop_yield(crop, crop_year, season, state, annual_rainfall, fertilizer, pesticide):
    crop = crop.strip()
    season = season.strip()
    state = state.strip()

    if crop not in label_encoders["Crop"].classes_:
        return f"Error: Crop '{crop}' not found. Valid options: {list(label_encoders['Crop'].classes_)}"
    if season not in label_encoders["Season"].classes_:
        return f"Error: Season '{season}' not found. Valid options: {list(label_encoders['Season'].classes_)}"
    if state not in label_encoders["State"].classes_:
        return f"Error: State '{state}' not found. Valid options: {list(label_encoders['State'].classes_)}"

    crop_enc = label_encoders["Crop"].transform([crop])[0]
    season_enc = label_encoders["Season"].transform([season])[0]
    state_enc = label_encoders["State"].transform([state])[0]

    input_data = pd.DataFrame([[crop_enc, crop_year, season_enc, state_enc, annual_rainfall, fertilizer, pesticide]],
                              columns=X.columns)
    predicted_yield = model.predict(input_data)
    return f"Predicted Yield: {predicted_yield[0]:.2f} metric tons per hectare"

print(predict_crop_yield("Arecanut", 1997, "Kharif", "Andhra Pradesh", 1200, 50, 5))


Columns in the dataset: ['Crop', 'Crop_Year', 'Season', 'State', 'Area', 'Production', 'Annual_Rainfall', 'Fertilizer', 'Pesticide', 'Yield']
Mean Absolute Error (MAE): 220.31
R² Score: 0.03
Root Mean Squared Error (RMSE): 932.86
Predicted Yield: 136.59 metric tons per hectare


# New Section