In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Example dataset with features and total cost (target variable)
data = {
    'crop_type': ['Wheat', 'Rice', 'Maize', 'Soybean', 'Barley'],
    'acreage': [10, 8, 12, 15, 7],  # Acres of land used for each crop
    'seed_cost_per_acre': [100, 120, 90, 110, 95],  # Seed cost per acre
    'fertilizer_cost_per_acre': [50, 60, 45, 55, 48],  # Fertilizer cost per acre
    'labor_cost_per_acre': [80, 85, 75, 90, 82],  # Labor cost per acre
    'total_cost': [2350, 1980, 2820, 3200, 1880]  # Total production cost for each crop
}

# Create DataFrame
df = pd.DataFrame(data)

# Separate features (X) and target variable (y)
X = df[['acreage', 'seed_cost_per_acre', 'fertilizer_cost_per_acre', 'labor_cost_per_acre']]
y = df['total_cost']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the linear regression model
model = LinearRegression()

# Train the model
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error:", mse)
print("R-squared:", r2)

# Predict total cost for a new crop
new_crop_features = [[13, 105, 55, 83]]  # Example features for a new crop
predicted_cost = model.predict(new_crop_features)
print("Predicted Total Cost for New Crop:", predicted_cost[0])


Mean Squared Error: 58175.554120154666
R-squared: nan
Predicted Total Cost for New Crop: 2821.383803909401


