# **TASK 2: Crop Price Prediction (Regression Task â€“ Agriculture)**

# Import libraries

> Add blockquote



In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error

# 2. Load and Prepare the Dataset
We will select numerical features and handle any missing data.

In [None]:
# Load the dataset
try:
    df = pd.read_csv('crop_price.csv')
except FileNotFoundError:
    print("Error: 'crop_price.csv' not found. Ensure the file is in the notebook folder.")

# Data Cleaning: Keep only numerical columns for this basic regression task
# Features: Cost of Cultivation, Production, Yield, Temperature, Rainfall
# Target: Price
cols_to_keep = ['CostCultivation', 'Production', 'Yield', 'Temperature', 'RainFall Annual', 'Price']
df_numeric = df[cols_to_keep].dropna()

# Display info
print(f"Dataset Shape: {df_numeric.shape}")
print(df_numeric.head())

Dataset Shape: (49, 6)
   CostCultivation  Production  Yield  Temperature  RainFall Annual     Price
0          9794.05     1941.55   9.83        28.96           3373.2  19589.10
1         10593.15     2172.46   7.47        29.22           3520.7  21187.30
2         13468.82     1898.30   9.59        28.47           2957.4  26938.64
3         17051.66     3670.54   6.42        28.49           3079.6  34104.32
4         17130.55     2775.80   8.72        28.30           2566.7  34262.10


# 3. Feature and Target Definition



In [None]:
# Features (Independent Variables)
X = df_numeric[['CostCultivation', 'Production', 'Yield', 'Temperature', 'RainFall Annual']]

# Target (Dependent Variable)
y = df_numeric['Price']

# Split: 80% Training, 20% Testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4. Train the Model (Linear Regression)

In [None]:
# Initialize and train
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

# Make predictions on test set
y_pred = lr_model.predict(X_test)

# 5. Evaluate the Results
We use Mean Absolute Error (MAE) to see how many "currency units" our prediction is off on average.

In [None]:
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)

print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")

# Compare Actual vs Predicted
comparison = pd.DataFrame({'Actual': y_test.values, 'Predicted': y_pred}).head(5)
print("\nSample Predictions:")
print(comparison)

Mean Absolute Error (MAE): 0.00
Root Mean Squared Error (RMSE): 0.00

Sample Predictions:
      Actual  Predicted
0   25972.90   25972.90
1   24929.80   24929.80
2   37959.76   37959.76
3  132671.12  132671.12
4   45016.72   45016.72
