<a href="https://colab.research.google.com/github/ManviNarang01/AgroInsight/blob/main/AgroInsightModel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import make_pipeline

In [None]:
# Setting up data types for columns in the weather and crop datasets.
dtype_weather = {
    'time': str,
}
dtype_crop = {
    'N': int,
    'P': int,
    'K': int,
    'temperature': float,
    'humidity': float,
    'ph': float,
    'rainfall': float,
    'label': str
}

In [None]:
# Loading the weather data while ensuring each column is of the right type.
weather_data = pd.read_csv('weather.csv', dtype=dtype_weather)

In [None]:
# Filtering out invalid 'time' rows and converting them into datetime objects.
weather_data = weather_data[weather_data['time'].apply(lambda x: 'T' in str(x))]
weather_data['time'] = pd.to_datetime(weather_data['time'])

In [None]:
# Extracting the date from the datetime object to simplify future analysis.
weather_data['date'] = weather_data['time'].dt.date

# Dropping the 'time' column as it's no longer needed after extracting the date.
weather_data = weather_data.drop(columns=['time'])

# Grouping the weather data by date and calculating the daily mean.
daily_weather_data = weather_data.groupby('date').mean(numeric_only=True).reset_index()

In [None]:
# Separating the features and the target variable for the rainfall prediction model.
# Here, 'rain (mm)' is the target and the rest are features.
X_rain = daily_weather_data.drop(columns=['rain (mm)'])
y_rain = daily_weather_data['rain (mm)']
y_rain = y_rain.fillna(0)  # Replacing missing values with 0 for consistency.

In [None]:
# Splitting the dataset into training and testing sets for the rainfall model.
X_rain_train, X_rain_test, y_rain_train, y_rain_test = train_test_split(X_rain, y_rain, test_size=0.2, random_state=42)

In [None]:
# Creating a preprocessing pipeline for the numerical features in the rainfall dataset.
numeric_features_rain = X_rain.select_dtypes(include=['float64']).columns
numeric_transformer_rain = make_pipeline(
    SimpleImputer(strategy='mean'),  # Filling missing values with the mean.
    StandardScaler()  # Standardizing the data for better model performance.
)

In [None]:
# Combining the preprocessing steps into a single transformer.
preprocessor_rain = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer_rain, numeric_features_rain)
    ])

In [None]:
# Building a pipeline that first preprocesses the data, then applies an XGBoost regressor.
# This pipeline simplifies the process of fitting and transforming the data.
model_xgb_rain = make_pipeline(preprocessor_rain, XGBRegressor(random_state=42))

# Training the rainfall prediction model on the training dataset.
model_xgb_rain.fit(X_rain_train, y_rain_train)

In [None]:
# Making predictions on the test dataset to evaluate the model.
y_pred_rain = model_xgb_rain.predict(X_rain_test)

# Calculating the Mean Squared Error for the rainfall prediction model.
mse_rain = mean_squared_error(y_rain_test, y_pred_rain)
print(f'Mean Squared Error (Rainfall Prediction): {mse_rain}')

Mean Squared Error (Rainfall Prediction): 0.11721523113843363


In [None]:
# Loading the crop recommendation dataset with specific data types.
crop_data = pd.read_csv('Crop_recommendation.csv', dtype=dtype_crop)

In [None]:
# Ensuring pH values are within a realistic range (0 to 14).
crop_data['ph'] = crop_data['ph'].apply(lambda x: min(14, max(0, x)))

In [None]:
# Separating the crop dataset into features and the target label.
X_crop = crop_data.drop(columns=['label'])
y_crop = crop_data['label']

In [None]:
# Applying label encoding to convert categorical labels into numerical format.
le_crop = LabelEncoder()
y_crop = le_crop.fit_transform(y_crop)

In [None]:
# Splitting the crop data into training and testing sets for model validation.
X_crop_train, X_crop_test, y_crop_train, y_crop_test = train_test_split(X_crop, y_crop, test_size=0.2, random_state=42)

In [None]:
# Creating a RandomForest classifier pipeline for the crop recommendation.
model_crop = make_pipeline(RandomForestClassifier(n_estimators=100, random_state=42))

# Training the crop recommendation model using the training data.
# The model learns to associate the features with the crop labels.
model_crop.fit(X_crop_train, y_crop_train)

# Predicting crop labels on the test dataset to evaluate the model.
y_pred_crop = model_crop.predict(X_crop_test)

In [None]:
# Assessing the crop recommendation model's accuracy and printing a classification report.
accuracy_crop = accuracy_score(y_crop_test, y_pred_crop)
print(f'Accuracy (Crop Recommendation): {accuracy_crop}')
print('Classification Report (Crop Recommendation):')
print(classification_report(y_crop_test, y_pred_crop))

Accuracy (Crop Recommendation): 0.9931818181818182
Classification Report (Crop Recommendation):
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        23
           1       1.00      1.00      1.00        21
           2       1.00      1.00      1.00        20
           3       1.00      1.00      1.00        26
           4       1.00      1.00      1.00        27
           5       1.00      1.00      1.00        17
           6       1.00      1.00      1.00        17
           7       1.00      1.00      1.00        14
           8       0.92      1.00      0.96        23
           9       1.00      1.00      1.00        20
          10       0.92      1.00      0.96        11
          11       1.00      1.00      1.00        21
          12       1.00      1.00      1.00        19
          13       1.00      0.96      0.98        24
          14       1.00      1.00      1.00        19
          15       1.00      1.00      

In [None]:
# Loading the dataset for future weather predictions with specific data types.
prediction_data = pd.read_csv('predictions.csv', dtype=dtype_weather)

# Processing the future weather data similar to the initial weather dataset.
prediction_data = prediction_data[prediction_data['time'].apply(lambda x: 'T' in str(x))]
prediction_data['time'] = pd.to_datetime(prediction_data['time'])
prediction_data['date'] = prediction_data['time'].dt.date

# Grouping the future weather data by date and computing daily averages.
daily_prediction_data = prediction_data.groupby('date').mean(numeric_only=True).reset_index()

In [None]:
# Calculating average temperature, humidity, and rainfall for future weather conditions.
average_temperature = daily_prediction_data['temperature_2m (°C)'].mean()
average_humidity = daily_prediction_data['relative_humidity_2m (%)'].mean()
average_rainfall = y_pred_rain.mean()*100  # Scaling the predicted rainfall

In [None]:
# Preparing a new dataset for crop prediction using the future weather averages.
new_weather_data = pd.DataFrame({
    'N': [0],
    'P': [0],
    'K': [0],
    'temperature': [average_temperature],
    'humidity': [average_humidity],
    'ph': [7],  # Assuming a neutral pH value
    'rainfall': [average_rainfall]
})

In [None]:
# Predicting the most suitable crop for the upcoming season based on the predicted weather conditions.
predicted_crop = le_crop.inverse_transform(model_crop.predict(new_weather_data))
print(f'Recommended Crop for the Next Quarter: {predicted_crop[0]}')

Recommended Crop for the Next Quarter: mothbeans
