In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
crops = pd.read_csv('crop_yield.csv')

crops.head()

Unnamed: 0,Region,Soil_Type,Crop,Rainfall_mm,Temperature_Celsius,Fertilizer_Used,Irrigation_Used,Weather_Condition,Days_to_Harvest,Yield_tons_per_hectare
0,West,Sandy,Cotton,897.077239,27.676966,False,True,Cloudy,122,6.555816
1,South,Clay,Rice,992.673282,18.026142,True,True,Rainy,140,8.527341
2,North,Loam,Barley,147.998025,29.794042,False,False,Sunny,106,1.127443
3,North,Sandy,Soybean,986.866331,16.64419,False,True,Rainy,146,6.517573
4,South,Silt,Wheat,730.379174,31.620687,True,True,Cloudy,110,7.248251


In [3]:
# Target variable
Y = crops["Yield_tons_per_hectare"]

# Feature set
X = crops.drop("Yield_tons_per_hectare", axis=1)

In [4]:
# Identify categorical and numeric columns
categorical_cols = ["Region", "Soil_Type", "Crop", "Weather_Condition"]
numeric_cols = ["Rainfall_mm", "Temperature_Celsius", "Fertilizer_Used",
                "Irrigation_Used", "Days_to_Harvest"]

# Preprocessing for categorical data
preprocessor = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_cols),
        ("num", "passthrough", numeric_cols)
    ]
)

In [5]:
# Full pipeline
model = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("regressor", LinearRegression())
])

# Train-test split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Train the model
model.fit(X_train, Y_train)

# Predict
Y_pred = model.predict(X_test)

# Evaluation
print("Mean squared error: %.2f" % mean_squared_error(Y_test, Y_pred))
print("R2 score: %.2f" % r2_score(Y_test, Y_pred))


Mean squared error: 0.25
R2 score: 0.91


In [6]:
my_farm = {
    "Region": "North",
    "Soil_Type": "Loam",
    "Crop": "Wheat",
    "Rainfall_mm": 300,
    "Temperature_Celsius": 22,
    "Fertilizer_Used": 1,
    "Irrigation_Used": 1,
    "Weather_Condition": "Sunny",
    "Days_to_Harvest": 120
}

# Convert to DataFrame
my_farm_df = pd.DataFrame([my_farm])

# Predict
predicted_yield = model.predict(my_farm_df)

print("Predicted Yield (tons/hectare):", predicted_yield[0])

Predicted Yield (tons/hectare): 4.6381324990137225
