In [1]:
from google.colab import files
import io
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import (
    accuracy_score, confusion_matrix, classification_report,
    mean_squared_error, r2_score
)

# Step 1: Upload & load dataset
uploaded = files.upload()
filename = list(uploaded.keys())[0]
df = pd.read_csv(io.BytesIO(uploaded[filename]))

print("âœ… Dataset loaded. Shape:", df.shape)
print("\nColumns:", df.columns.tolist())
print("\nSample rows:\n", df.head())

# Step 2: Define target column (ðŸ‘‰ change this)
target_column = "Market_Price_per_ton"   # example
X = df.drop(columns=[target_column])
y = df[target_column]

# Step 3: Encode categorical features in X
X = pd.get_dummies(X, drop_first=True)

# Step 4: Encode target if categorical
if y.dtypes == 'object':
    le = LabelEncoder()
    y = le.fit_transform(y)

# Step 5: Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
print("\nTrain size:", X_train.shape, " Test size:", X_test.shape)

# Step 6: Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 7: Choose model
if len(np.unique(y)) < 20 and y.dtype in [np.int32, np.int64]:
    print("\nðŸ”¹ Detected task: Classification")
    model = RandomForestClassifier(random_state=42)
else:
    print("\nðŸ”¹ Detected task: Regression")
    model = RandomForestRegressor(random_state=42)

# Step 8: Train model
model.fit(X_train_scaled, y_train)

# Step 9: Predictions
y_pred = model.predict(X_test_scaled)

# Step 10: Evaluation
if isinstance(model, RandomForestClassifier):
    print("\nâœ… Accuracy:", accuracy_score(y_test, y_pred))
    print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
    print("\nClassification Report:\n", classification_report(y_test, y_pred))
else:
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    print("\nâœ… MSE:", mse)
    print("RMSE:", rmse)
    print("RÂ² Score:", r2)


Saving market_researcher_dataset.csv to market_researcher_dataset.csv
Saving farmer_advisor_dataset.csv to farmer_advisor_dataset.csv
âœ… Dataset loaded. Shape: (10000, 10)

Columns: ['Market_ID', 'Product', 'Market_Price_per_ton', 'Demand_Index', 'Supply_Index', 'Competitor_Price_per_ton', 'Economic_Indicator', 'Weather_Impact_Score', 'Seasonal_Factor', 'Consumer_Trend_Index']

Sample rows:
    Market_ID  Product  Market_Price_per_ton  Demand_Index  Supply_Index  \
0          1     Rice            180.251212    196.085900    199.509124   
1          2     Rice            420.527970    188.452400    150.789483   
2          3    Wheat            457.260398    171.179384     78.989326   
3          4  Soybean            237.179113    196.970677     50.464363   
4          5    Wheat            324.032925    113.165416    145.878647   

   Competitor_Price_per_ton  Economic_Indicator  Weather_Impact_Score  \
0                300.549219            1.093636             28.474810   
1      