In [1]:
import requests
import datetime
import pandas as pd
import numpy as np
import json
from meteostat import Point, Monthly
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from geopy.geocoders import Nominatim
import joblib
import os
import warnings
warnings.filterwarnings('ignore')

# Jupyter widgets for interactive interface
import ipywidgets as widgets
from IPython.display import display, clear_output, HTML
import matplotlib.pyplot as plt
import seaborn as sns

print("✅ All libraries imported successfully!")

✅ All libraries imported successfully!


In [2]:
# API Configuration
API_KEY = "mTgxFT8btimLVUoCY3ZkKQiwPxP6k0d6"  # Tomorrow.io API key
DEFAULT_CSV_PATH = "new_APY.csv"

# Global variables to store model and results
crop_model = None
model_trained = False
prediction_results = {}

print("⚙️ Configuration completed!")

⚙️ Configuration completed!


In [3]:
# Cell 3: Utility Functions
# =========================

def get_lat_lon(state, district, country="India"):
    """Get latitude and longitude from location."""
    try:
        geolocator = Nominatim(user_agent="prediction_app")
        query = f"{district}, {state}, {country}"
        location = geolocator.geocode(query)
        if location:
            return location.latitude, location.longitude
        else:
            print(f"⚠️ Could not find {query}, defaulting to Delhi.")
            return 28.7041, 77.1025
    except Exception as e:
        print(f"❌ Error fetching location: {e}")
        print("Using default: Delhi")
        return 28.7041, 77.1025

def format_month_name(month_num):
    """Convert month number to name."""
    months = ['January', 'February', 'March', 'April', 'May', 'June',
              'July', 'August', 'September', 'October', 'November', 'December']
    return months[month_num - 1] if 1 <= month_num <= 12 else "Invalid"

print("🔧 Utility functions defined!")

🔧 Utility functions defined!


In [4]:
# Cell 4: Weather Prediction Functions
# ====================================

def get_forecast_tomorrowio(lat, lon, start_date, end_date):
    """Fetch forecast from Tomorrow.io API."""
    url = "https://api.tomorrow.io/v4/timelines"
    headers = {"apikey": API_KEY}
    params = {
        "location": f"{lat},{lon}",
        "fields": ["temperature", "precipitationIntensity", "windSpeed", "humidity"],
        "startTime": start_date.isoformat(),
        "endTime": end_date.isoformat(),
        "timesteps": "1d"
    }

    try:
        response = requests.get(url, headers=headers, params=params)
        if response.status_code == 200:
            return response.json()
        else:
            print(f"⚠️ API Error: {response.status_code}")
            return None
    except Exception as e:
        print(f"❌ API Request failed: {e}")
        return None

def predict_weather_historical(lat, lon, month, year):
    """Predict weather from historical data using Meteostat."""
    try:
        location = Point(lat, lon)
        
        # Fetch historical monthly data (last 10 years)
        start = datetime.datetime(year - 10, month, 1)
        end = datetime.datetime(year - 1, month, 28)

        data = Monthly(location, start, end)
        df = data.fetch()

        if df.empty:
            return None

        # Drop rows with missing values
        df = df[['tavg', 'prcp']].dropna()

        if df.empty or len(df) < 3:
            print("⚠️ Not enough historical data available.")
            return None

        # Train regression models
        X = np.array(df.index.year).reshape(-1, 1)
        y_temp = df['tavg'].values
        y_precip = df['prcp'].values

        model_temp = LinearRegression().fit(X, y_temp)
        model_precip = LinearRegression().fit(X, y_precip)

        # Predict for given year
        X_pred = np.array([[year]])
        predicted_temp = model_temp.predict(X_pred)[0]
        predicted_precip = model_precip.predict(X_pred)[0]

        return {
            "predicted_temperature": round(predicted_temp, 2),
            "predicted_precipitation_mm": round(predicted_precip, 2)
        }

    except Exception as e:
        print(f"❌ Error in historical prediction: {e}")
        return None

print("🌤️ Weather prediction functions ready!")

🌤️ Weather prediction functions ready!


In [5]:
# Cell 5: Crop Yield Prediction Functions
# =======================================

def train_crop_model(csv_file_path=DEFAULT_CSV_PATH):
    """Train the crop yield prediction model."""
    global crop_model, model_trained
    
    try:
        # Load dataset
        if not os.path.exists(csv_file_path):
            print(f"❌ File {csv_file_path} not found!")
            return False
            
        df = pd.read_csv(csv_file_path)
        print(f"📊 Dataset loaded: {len(df)} records")
        
        # Feature selection
        features = ["Month", "State", "District", "Crop"]
        target = "Yield"
        
        # Check if required columns exist
        missing_cols = [col for col in features + [target] if col not in df.columns]
        if missing_cols:
            print(f"❌ Missing columns: {missing_cols}")
            return False
        
        X = df[features]
        y = df[target]
        
        # Train-test split
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=42
        )
        
        # Preprocessing
        preprocessor = ColumnTransformer(
            transformers=[
                ("cat", OneHotEncoder(handle_unknown="ignore"), features)
            ]
        )
        
        # Model pipeline
        crop_model = Pipeline(steps=[
            ("preprocessor", preprocessor),
            ("regressor", RandomForestRegressor(n_estimators=200, random_state=42))
        ])
        
        # Train
        print("🤖 Training model...")
        crop_model.fit(X_train, y_train)
        
        # Evaluate
        y_pred = crop_model.predict(X_test)
        mse = mean_squared_error(y_test, y_pred)
        rmse = np.sqrt(mse)
        r2 = r2_score(y_test, y_pred)
        
        print(f"✅ Model Training Complete!")
        print(f"📈 RMSE: {rmse:.4f}")
        print(f"📈 R²: {r2:.4f}")
        
        # Save model
        joblib.dump(crop_model, "crop_yield_model.pkl")
        model_trained = True
        
        return True
        
    except Exception as e:
        print(f"❌ Error training crop model: {e}")
        return False

def load_crop_model(model_path="crop_yield_model.pkl"):
    """Load pre-trained crop yield model."""
    global crop_model, model_trained
    
    try:
        if os.path.exists(model_path):
            crop_model = joblib.load(model_path)
            model_trained = True
            print("✅ Pre-trained model loaded successfully!")
            return True
        else:
            print(f"⚠️ Model file {model_path} not found.")
            return False
    except Exception as e:
        print(f"❌ Error loading model: {e}")
        return False

def predict_crop_yield(state, district, crop, month):
    """Predict crop yield for given inputs."""
    global crop_model, model_trained
    
    if not model_trained or crop_model is None:
        return None
        
    try:
        user_input = pd.DataFrame([{
            "State": state,
            "District": district,
            "Crop": crop,
            "Month": str(month)
        }])
        
        prediction = crop_model.predict(user_input)
        return round(prediction[0], 2)
        
    except Exception as e:
        print(f"❌ Error predicting crop yield: {e}")
        return None

print("🌾 Crop prediction functions ready!")

🌾 Crop prediction functions ready!


In [6]:
# Cell 6: Model Training and Loading
# ==================================

print("🚀 Initializing crop yield prediction model...")

# Try to load existing model first
if not load_crop_model():
    print("\n📚 No pre-trained model found. Training new model...")
    if os.path.exists(DEFAULT_CSV_PATH):
        train_crop_model(DEFAULT_CSV_PATH)
    else:
        print(f"⚠️ Dataset file '{DEFAULT_CSV_PATH}' not found.")
        print("📝 Please ensure you have the crop data CSV file in the working directory.")
        print("🔄 You can train the model later using the train_crop_model() function.")

🚀 Initializing crop yield prediction model...
✅ Pre-trained model loaded successfully!


In [None]:
# Cell 7: Interactive Widget Interface
# ====================================

# Create widgets
state_widget = widgets.Text(
    value='Maharashtra',
    placeholder='Enter state name',
    description='State:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='300px')
)

district_widget = widgets.Text(
    value='Pune',
    placeholder='Enter district name',
    description='District:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='300px')
)

crop_widget = widgets.Text(
    value='Sugarcane',
    placeholder='Enter crop type',
    description='Crop:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='300px')
)

month_widget = widgets.Dropdown(
    options=[(f'{i} - {format_month_name(i)}', i) for i in range(1, 13)],
    value=9,
    description='Month:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='300px')
)

year_widget = widgets.IntText(
    value=2025,
    description='Year:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='300px')
)

predict_button = widgets.Button(
    description='Run Prediction',
    button_style='success',
    tooltip='Click to run prediction',
    icon='play',
    layout=widgets.Layout(width='200px', height='40px')
)

In [8]:
# Output area
output_area = widgets.Output()

# Layout
input_form = widgets.VBox([
    widgets.HTML("<h2>🌾 Weather & Crop Yield Prediction System</h2>"),
    widgets.HTML("<hr>"),
    state_widget,
    district_widget,
    crop_widget,
    month_widget,
    year_widget,
    widgets.HTML("<br>"),
    predict_button,
    widgets.HTML("<hr>"),
    output_area
])

# Display the interface
display(input_form)

print("🎛️ Interactive interface created!")

VBox(children=(HTML(value='<h2>🌾 Weather & Crop Yield Prediction System</h2>'), HTML(value='<hr>'), Text(value…

🎛️ Interactive interface created!


In [9]:
# Cell 8: Prediction Logic and Display Functions
# ==============================================

def run_prediction_widget():
    """Main prediction function for widget interface."""
    global prediction_results
    
    with output_area:
        clear_output(wait=True)
        
        # Get values from widgets
        state = state_widget.value.strip()
        district = district_widget.value.strip()
        crop = crop_widget.value.strip()
        month = month_widget.value
        year = year_widget.value
        
        if not all([state, district, crop]):
            print("❌ Please fill in all fields!")
            return
        
        print("🔄 Processing prediction...")
        print(f"📍 Location: {state}, {district}")
        print(f"🌾 Crop: {crop}")
        print(f"📅 Time: {format_month_name(month)} {year}")
        print("-" * 50)
        
        # Get coordinates
        lat, lon = get_lat_lon(state, district)
        print(f"🗺️ Coordinates: {lat:.4f}, {lon:.4f}")
        
        # Weather prediction
        start_date = datetime.datetime(year, month, 1)
        if month == 12:
            end_date = datetime.datetime(year + 1, 1, 1)
        else:
            end_date = datetime.datetime(year, month + 1, 1)
        
        # Try Tomorrow.io API first
        forecast = get_forecast_tomorrowio(lat, lon, start_date, end_date)
        
        weather_result = {
            "provider": "none",
            "t_avg_c": None,
            "precip_mm_day": None,
            "humidity_avg": None,
            "wind_m_s": None
        }
        
        if forecast and "data" in forecast and forecast["data"]["timelines"]:
            print("🌐 Using Tomorrow.io API data")
            intervals = forecast["data"]["timelines"][0]["intervals"]
            temps = [interval["values"]["temperature"] for interval in intervals]
            precips = [interval["values"]["precipitationIntensity"] for interval in intervals]
            humidities = [interval["values"]["humidity"] for interval in intervals]
            winds = [interval["values"]["windSpeed"] for interval in intervals]
            
            weather_result.update({
                "provider": "tomorrow.io",
                "t_avg_c": round(np.mean(temps), 2),
                "precip_mm_day": round(np.mean(precips), 2),
                "humidity_avg": round(np.mean(humidities), 2),
                "wind_m_s": round(np.mean(winds), 2)
            })
        else:
            print("📊 Using historical data prediction")
            historical_weather = predict_weather_historical(lat, lon, month, year)
            if historical_weather:
                weather_result.update({
                    "provider": "historical",
                    "t_avg_c": historical_weather["predicted_temperature"],
                    "precip_mm_day": historical_weather["predicted_precipitation_mm"],
                    "humidity_avg": None,
                    "wind_m_s": None
                })
        
        # Crop yield prediction
        crop_yield = predict_crop_yield(state, district, crop, month) if model_trained else None
        
        # Store results
        prediction_results = {
            "inputs": {
                "State": state,
                "District": district,
                "Crop": crop,
                "Month": month,
                "Year": year
            },
            "location": {"lat": lat, "lon": lon},
            "weather": weather_result,
            "yield_prediction": crop_yield,
            "model_trained": model_trained
        }
        
        # Display results
        display_prediction_results()

def display_prediction_results():
    """Display prediction results in a formatted way."""
    if not prediction_results:
        print("❌ No prediction results available!")
        return
    
    inputs = prediction_results["inputs"]
    location = prediction_results["location"]
    weather = prediction_results["weather"]
    yield_pred = prediction_results["yield_prediction"]
    
    # Create styled output
    html_output = f"""
    <div style="background-color: #f8f9fa; padding: 20px; border-radius: 10px; border: 1px solid #dee2e6;">
        <h3 style="color: #28a745; margin-bottom: 20px;">📊 Prediction Results</h3>
        
        <div style="background-color: white; padding: 15px; border-radius: 8px; margin-bottom: 15px;">
            <h4 style="color: #495057; margin-bottom: 10px;">📍 Input Details</h4>
            <p><strong>Location:</strong> {inputs['State']}, {inputs['District']}</p>
            <p><strong>Crop:</strong> {inputs['Crop']}</p>
            <p><strong>Time:</strong> {format_month_name(inputs['Month'])} {inputs['Year']}</p>
            <p><strong>Coordinates:</strong> {location['lat']:.4f}, {location['lon']:.4f}</p>
        </div>
        
        <div style="background-color: white; padding: 15px; border-radius: 8px; margin-bottom: 15px;">
            <h4 style="color: #495057; margin-bottom: 10px;">🌤️ Weather Prediction</h4>
            <p><strong>Data Source:</strong> {weather['provider'].title()}</p>
            <p><strong>Average Temperature:</strong> {weather['t_avg_c']}°C</p>
            <p><strong>Precipitation:</strong> {weather['precip_mm_day']} mm/day</p>
            {f"<p><strong>Humidity:</strong> {weather['humidity_avg']}%</p>" if weather['humidity_avg'] else ""}
            {f"<p><strong>Wind Speed:</strong> {weather['wind_m_s']} m/s</p>" if weather['wind_m_s'] else ""}
        </div>
        
        <div style="background-color: white; padding: 15px; border-radius: 8px;">
            <h4 style="color: #495057; margin-bottom: 10px;">🌾 Crop Yield Prediction</h4>
            {'<p style="color: #28a745; font-size: 18px;"><strong>Predicted Yield: ' + str(yield_pred) + ' tons/hectare</strong></p>' if yield_pred else '<p style="color: #dc3545;">Model not trained or prediction unavailable</p>'}
        </div>
    </div>
    """
    
    display(HTML(html_output))
    
    # Also print JSON output
    print("\n" + "="*50)
    print("Raw JSON Output:")
    print("="*50)
    print(json.dumps(prediction_results, indent=2))

# Connect button to function
predict_button.on_click(lambda x: run_prediction_widget())

print("✅ All functions connected! Ready to use!")

✅ All functions connected! Ready to use!


In [10]:
# Cell 9: Additional Utility Functions
# ====================================

def retrain_model(csv_path=None):
    """Retrain the crop yield model with new data."""
    if csv_path is None:
        csv_path = DEFAULT_CSV_PATH
    
    print(f"🔄 Retraining model with data from: {csv_path}")
    success = train_crop_model(csv_path)
    
    if success:
        print("✅ Model retrained successfully!")
    else:
        print("❌ Model retraining failed!")
    
    return success

def get_model_info():
    """Get information about the current model."""
    if model_trained and crop_model is not None:
        print("📊 Model Information:")
        print(f"Status: ✅ Trained and Ready")
        print(f"Type: Random Forest Regressor")
        print(f"Features: Month, State, District, Crop")
        print(f"Target: Crop Yield (tons/hectare)")
        
        # Try to get feature importance if available
        try:
            if hasattr(crop_model.named_steps['regressor'], 'feature_importances_'):
                print("Feature importances available in model")
        except:
            pass
    else:
        print("❌ No model currently loaded or trained")

def show_sample_data():
    """Display sample data format expected by the model."""
    sample_data = {
        'Month': ['1', '2', '3'],
        'State': ['Maharashtra', 'Punjab', 'Karnataka'],
        'District': ['Pune', 'Ludhiana', 'Bangalore'],
        'Crop': ['Sugarcane', 'Wheat', 'Rice'],
        'Yield': [45.2, 38.7, 52.1]
    }
    
    df_sample = pd.DataFrame(sample_data)
    print("📋 Expected Data Format:")
    print("="*30)
    display(df_sample)

# Display utility functions info
print("🛠️ Additional utility functions available:")
print("• retrain_model(csv_path) - Retrain the model with new data")
print("• get_model_info() - Get current model information") 
print("• show_sample_data() - Show expected data format")

🛠️ Additional utility functions available:
• retrain_model(csv_path) - Retrain the model with new data
• get_model_info() - Get current model information
• show_sample_data() - Show expected data format
