In [None]:

# 📦 Import required libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score
import joblib


In [None]:

# 📂 Upload your cleaned Excel file here (same as you downloaded before)
from google.colab import files
uploaded = files.upload()

# Load it into a DataFrame
df = pd.read_excel(next(iter(uploaded)))
df.head()


In [None]:

# 🔁 Encode categorical variables
season_encoder = LabelEncoder()
district_encoder = LabelEncoder()
df['Season_encoded'] = season_encoder.fit_transform(df['Season'])
df['District_encoded'] = district_encoder.fit_transform(df['District'])

# Save mappings for reference
season_mapping = dict(zip(season_encoder.classes_, season_encoder.transform(season_encoder.classes_)))
district_mapping = dict(zip(district_encoder.classes_, district_encoder.transform(district_encoder.classes_)))

# Clean missing values
required_cols = ['Sown(hect)', 'Gross Harvested(hect)', 'Nett harvetsted(hect)', 'Yield(kg per hect)']
df_clean = df.dropna(subset=['Year', 'Season_encoded', 'District_encoded'] + required_cols)


In [None]:

# 🧠 Define input features and targets
features = ['Year', 'Season_encoded', 'District_encoded', 'Sown(hect)']

def train_model(target_column):
    X = df_clean[features]
    y = df_clean[target_column]
    X_train, _, y_train, _ = train_test_split(X, y, test_size=0.2, random_state=42)
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    return model

# Train 3 models
model_gross = train_model('Gross Harvested(hect)')
model_nett = train_model('Nett harvetsted(hect)')
model_yield = train_model('Yield(kg per hect)')


In [None]:

# 💾 Save models and mappings
joblib.dump(model_gross, "model_gross.pkl")
joblib.dump(model_nett, "model_nett.pkl")
joblib.dump(model_yield, "model_yield.pkl")

# Save encodings
pd.DataFrame.from_dict(district_mapping, orient='index', columns=['District_encoded']).to_csv("district_encoding.csv")
pd.DataFrame.from_dict(season_mapping, orient='index', columns=['Season_encoded']).to_csv("season_encoding.csv")

# 🔁 Download from Colab
files.download("model_gross.pkl")
files.download("model_nett.pkl")
files.download("model_yield.pkl")
files.download("district_encoding.csv")
files.download("season_encoding.csv")
