In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install firebase-admin joblib python-dotenv


In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import os
from dotenv import load_dotenv

# Load environment variables from .env file
# If using Google Colab, you might need to upload your .env file
from google.colab import files
try:
    uploaded = files.upload() # Upload .env file
    with open('.env', 'w') as f:
        f.write(uploaded[list(uploaded.keys())[0]].decode('utf8'))
    print(".env file uploaded successfully")
except:
    print("No .env file uploaded or already exists")

# Load environment variables
load_dotenv()

# Rest of your imports
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Use environment variable for dataset path
data_path = os.getenv('DATASET_PATH', "/content/drive/MyDrive/datasets - datasets.csv")
df = pd.read_csv(data_path)
df.head()

df['CropType'].value_counts()
df.shape

df.isnull().sum()

df.duplicated().sum()

df.describe()

df

from sklearn.preprocessing import LabelEncoder

categorical_columns = ['CropType']
label_encoders = {}
for column in categorical_columns:
    label_encoders[column] = LabelEncoder()  # Save encoder for each column
    df[column] = label_encoders[column].fit_transform(df[column])  # Encode directly on the column

    # Print mapping of categories to numeric values
    print(f"Mapping for column {column}:")
    for class_, value in zip(label_encoders[column].classes_, range(len(label_encoders[column].classes_))):
        print(f"  {class_} -> {value}")
print("\nData after encoding:")
print(df.head())


import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

correlation_matrix = df[['SoilMoisture', 'temperature', 'Humidity','CropDays']].corr()
plt.figure(figsize=(8, 6))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f", linewidths=0.5)
plt.title("Heatmap of Correlations")
plt.show()
df_encoded = pd.get_dummies(df, columns=['CropType'], drop_first=True)
correlation_matrix = df_encoded.corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f", linewidths=0.5)
plt.show()

df=df.drop(columns=['CropDays'])

df

# Splitting data
X = df.drop(columns=['Irrigation'])
y = df['Irrigation']

from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Split data into training and testing sets (only do this once)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create and train the optimized Decision Tree model
model = DecisionTreeClassifier(criterion='entropy', class_weight='balanced', max_depth=5,
                              min_samples_split=10, min_samples_leaf=5)
model.fit(X_train, y_train)

# Make predictions and evaluate
y_pred = model.predict(X_test)
test_accuracy = accuracy_score(y_test, y_pred)

# Display results
print(f"Decision Tree Accuracy: {test_accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Cross-validation
from sklearn.model_selection import cross_val_score
cv_scores = cross_val_score(model, X, y, cv=5)
print("Cross-validation scores:", cv_scores)
print(f"Average cross-validation score: {cv_scores.mean():.4f}")

# Visualize decision tree
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt

plt.figure(figsize=(20, 10))
plot_tree(
    model,
    feature_names=X.columns,
    class_names=['No Irrigation Needed', 'Irrigation Needed'],
    filled=True,
    rounded=True
)
plt.title("Decision Tree Visualization")
plt.show()

# Confusion Matrix visualization (fixed)
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

# Create confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)

# Display confusion matrix as a heatmap (corrected)
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
            xticklabels=['No Irrigation Needed', 'Irrigation Needed'],
            yticklabels=['No Irrigation Needed', 'Irrigation Needed'])
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix')
plt.show()

# Splitting Data for Naive Bayes (keep this separate)
X_bayes = df.drop(columns=['Irrigation'])
y_bayes = df['Irrigation']

from sklearn.naive_bayes import GaussianNB

# Split data into training and testing sets
X_bayes_train, X_bayes_test, y_bayes_train, y_bayes_test = train_test_split(X_bayes, y_bayes, test_size=0.2, random_state=42)

# Create and train Naive Bayes model
gnb = GaussianNB()
gnb.fit(X_bayes_train, y_bayes_train)

# Make predictions and evaluate
y_bayes_pred = gnb.predict(X_bayes_test)
bayes_accuracy = accuracy_score(y_bayes_test, y_bayes_pred)
print(f"\nNaive Bayes Accuracy: {bayes_accuracy:.4f}")
print(classification_report(y_bayes_test, y_bayes_pred))

# Cross Validation
from sklearn.model_selection import cross_val_score
scores = cross_val_score(gnb, X_bayes, y_bayes, cv=5)  # cv=5 means using 5 folds
print("Cross-validation scores:", scores)
print("Mean accuracy:", scores.mean())

import pandas as pd

# Create DataFrame to compare predictions and actual values
predictions = pd.DataFrame({
    'Actual': y_bayes_test,
    'Predicted': y_bayes_pred
})

# Count the number of mismatched data
mismatches = (predictions['Actual'] != predictions['Predicted']).sum()

# Display prediction table
print(predictions.head(10))  # Display the first 10 rows
print(f"\nNumber of mismatched data in the entire dataset: {mismatches}")

# Import required libraries
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

# Create confusion matrix
conf_matrix = confusion_matrix(y_bayes_test, y_bayes_pred)

# Display confusion matrix as a heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
            xticklabels=['No Irrigation Needed', 'Irrigation Needed'],
            yticklabels=['No Irrigation Needed', 'Irrigation Needed'])
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix')
plt.show()

# Add additional machine learning algorithms for comparison
print("\n" + "="*50)
print("ADDITIONAL CLASSIFICATION MODELS")
print("="*50)

# Use the same train-test split for fair comparison
X_comp = df.drop(columns=['Irrigation'])
y_comp = df['Irrigation']
X_train_comp, X_test_comp, y_train_comp, y_test_comp = train_test_split(X_comp, y_comp, test_size=0.3, random_state=42)

# Feature scaling for better performance on some algorithms
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_comp)
X_test_scaled = scaler.transform(X_test_comp)

# 1. Support Vector Machine (SVM)
from sklearn.svm import SVC
print("\n--- Support Vector Machine (SVM) ---")
svm_model = SVC(kernel='rbf', C=1.0, gamma='scale', class_weight='balanced', random_state=42)
svm_model.fit(X_train_scaled, y_train_comp)
svm_pred = svm_model.predict(X_test_scaled)
svm_accuracy = accuracy_score(y_test_comp, svm_pred)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print(classification_report(y_test_comp, svm_pred))

# 2. Random Forest
from sklearn.ensemble import RandomForestClassifier
print("\n--- Random Forest ---")
rf_model = RandomForestClassifier(n_estimators=100, max_depth=8, min_samples_split=5,
                                 class_weight='balanced', random_state=42)
rf_model.fit(X_train_comp, y_train_comp)
rf_pred = rf_model.predict(X_test_comp)
rf_accuracy = accuracy_score(y_test_comp, rf_pred)
print(f"Random Forest Accuracy: {rf_accuracy:.4f}")
print(classification_report(y_test_comp, rf_pred))

# Feature importance from Random Forest
feature_importance = pd.DataFrame({
    'Feature': X_comp.columns,
    'Importance': rf_model.feature_importances_
}).sort_values(by='Importance', ascending=False)
print("\nFeature Importance:")
print(feature_importance)

# 3. Gradient Boosting (Improved)
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import GridSearchCV

print("\n--- Gradient Boosting (Improved) ---")

# Grid search for optimal parameters
gb_param_grid = {
    'n_estimators': [100, 200],
    'learning_rate': [0.05, 0.1, 0.2],
    'max_depth': [3, 5, 7],
    'min_samples_split': [2, 5],
    'subsample': [0.8, 1.0]
}

# Use a smaller grid for efficiency while maintaining performance improvements
gb_param_grid_small = {
    'n_estimators': [200],
    'learning_rate': [0.05, 0.1],
    'max_depth': [5, 7],
    'subsample': [0.8]
}

gb_grid = GridSearchCV(
    GradientBoostingClassifier(random_state=42),
    gb_param_grid_small,
    cv=5,
    scoring='accuracy',
    n_jobs=-1
)

gb_grid.fit(X_train_scaled, y_train_comp)  # Using scaled data for better performance
print(f"Best Gradient Boosting parameters: {gb_grid.best_params_}")

# Use the best model found
gb_best = gb_grid.best_estimator_
gb_pred_best = gb_best.predict(X_test_scaled)
gb_accuracy_best = accuracy_score(y_test_comp, gb_pred_best)
print(f"Improved Gradient Boosting Accuracy: {gb_accuracy_best:.4f}")
print(classification_report(y_test_comp, gb_pred_best))

# Save the improved model accuracy for comparison
gb_accuracy = gb_accuracy_best

# 4. K-Nearest Neighbors (Improved)
from sklearn.neighbors import KNeighborsClassifier
print("\n--- K-Nearest Neighbors (Improved) ---")

# Function to find optimal K value
def find_optimal_k(X_train, y_train, X_test, y_test, max_k=30):
    k_values = list(range(1, max_k + 1))
    accuracies = []

    for k in k_values:
        knn = KNeighborsClassifier(n_neighbors=k, weights='distance')  # Using distance weighting
        knn.fit(X_train, y_train)
        accuracy = accuracy_score(y_test, knn.predict(X_test))
        accuracies.append(accuracy)

    # Find best K
    best_k = k_values[accuracies.index(max(accuracies))]
    return best_k, max(accuracies), k_values, accuracies

# Find optimal K value
best_k, best_accuracy, k_values, accuracies = find_optimal_k(X_train_scaled, y_train_comp, X_test_scaled, y_test_comp)
print(f"Optimal K value: {best_k}")

# Visualize K value vs. accuracy
plt.figure(figsize=(10, 6))
plt.plot(k_values, accuracies, 'o-')
plt.xlabel('K Value (Number of Neighbors)')
plt.ylabel('Accuracy')
plt.title('K Value vs. Accuracy for KNN')
plt.grid(True)
plt.show()

# More comprehensive KNN parameter tuning
knn_param_grid = {
    'n_neighbors': [best_k-2, best_k-1, best_k, best_k+1, best_k+2] if best_k > 2 else [1, 2, 3, 4, 5],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan', 'minkowski'],
    'p': [1, 2]  # p=1 for manhattan, p=2 for euclidean
}

knn_grid = GridSearchCV(
    KNeighborsClassifier(),
    knn_param_grid,
    cv=5,
    scoring='accuracy',
    n_jobs=-1
)

knn_grid.fit(X_train_scaled, y_train_comp)
print(f"Best KNN parameters: {knn_grid.best_params_}")

# Use the best model found
knn_best = knn_grid.best_estimator_
knn_pred_best = knn_best.predict(X_test_scaled)
knn_accuracy_best = accuracy_score(y_test_comp, knn_pred_best)
print(f"Improved KNN Accuracy: {knn_accuracy_best:.4f}")
print(classification_report(y_test_comp, knn_pred_best))

# Save the improved model accuracy for comparison
knn_accuracy = knn_accuracy_best

# Compare all models
print("\n" + "="*50)
print("MODEL COMPARISON SUMMARY")
print("="*50)
models = {
    'Decision Tree': test_accuracy,
    'Naive Bayes': bayes_accuracy,
    'SVM': svm_accuracy,
    'Random Forest': rf_accuracy,
    'Gradient Boosting': gb_accuracy,
    'KNN': knn_accuracy
}

# Sort models by accuracy
sorted_models = dict(sorted(models.items(), key=lambda item: item[1], reverse=True))
for model_name, accuracy in sorted_models.items():
    print(f"{model_name}: {accuracy:.4f}")

# Visualize model comparison
plt.figure(figsize=(12, 6))
plt.bar(sorted_models.keys(), sorted_models.values(), color='skyblue')
plt.xlabel('Model')
plt.ylabel('Accuracy')
plt.title('Model Comparison')
plt.ylim(0.6, 1.0)  # Setting y-axis limits for better visualization
plt.xticks(rotation=45, ha='right')
for i, (model, acc) in enumerate(sorted_models.items()):
    plt.text(i, acc + 0.01, f'{acc:.4f}', ha='center')
plt.tight_layout()
plt.show()

# Update models dictionary with improved values
models = {
    'Decision Tree': test_accuracy,
    'Naive Bayes': bayes_accuracy,
    'SVM': svm_accuracy,
    'Random Forest': rf_accuracy,
    'Gradient Boosting': gb_accuracy,
    'KNN': knn_accuracy
}

# Sort models by accuracy
sorted_models = dict(sorted(models.items(), key=lambda item: item[1], reverse=True))
print("\n" + "="*50)
print("UPDATED MODEL COMPARISON SUMMARY")
print("="*50)
for model_name, accuracy in sorted_models.items():
    print(f"{model_name}: {accuracy:.4f}")

# Visualize updated model comparison
plt.figure(figsize=(12, 6))
plt.bar(sorted_models.keys(), sorted_models.values(), color='skyblue')
plt.xlabel('Model')
plt.ylabel('Accuracy')
plt.title('Updated Model Comparison')
plt.ylim(0.6, 1.0)  # Setting y-axis limits for better visualization
plt.xticks(rotation=45, ha='right')
for i, (model, acc) in enumerate(sorted_models.items()):
    plt.text(i, acc + 0.01, f'{acc:.4f}', ha='center')
plt.tight_layout()
plt.show()

# Save the best KNN model for future use
import joblib
import pickle
import os

# Create a directory for models if it doesn't exist
model_dir = r"/content/drive/MyDrive/KNNModel"
if not os.path.exists(model_dir):
    os.makedirs(model_dir)

# Save the KNN model using joblib (faster and better for larger models)
joblib_path = os.path.join(model_dir, "knn_irrigation_model.joblib")
joblib.dump(knn_best, joblib_path)
print(f"\nModel saved to: {joblib_path}")

# Also save the scaler for preprocessing new data
scaler_path = os.path.join(model_dir, "knn_scaler.joblib")
joblib.dump(scaler, scaler_path)
print(f"Scaler saved to: {scaler_path}")

# Save feature names for reference
feature_names = {
    'features': list(X_comp.columns)
}
with open(os.path.join(model_dir, "feature_names.pkl"), 'wb') as f:
    pickle.dump(feature_names, f)

# Example of how to load and use the model
print("\nExample code to load and use the model:")
print("""
# Load KNN model
import joblib
model = joblib.load('knn_irrigation_model.joblib')
scaler = joblib.load('knn_scaler.joblib')

# Prepare input data (must have same features as training data)
# Example input: [CropType, SoilMoisture, temperature, Humidity]
input_data = [[3, 500, 28, 65]]  # Example values

# Scale the input data
scaled_input = scaler.transform(input_data)

# Make prediction
prediction = model.predict(scaled_input)
print(f"Irrigation needed: {'Yes' if prediction[0] == 1 else 'No'}")
""")

# Code for Google Colab and GCP storage
print("\n" + "="*50)
print("GOOGLE COLAB AND GCP INTEGRATION")
print("="*50)
print("# Code to use in Google Colab to save model to GCP:")
print("""
# 1. Install required packages in Colab
!pip install google-cloud-storage

# 2. Upload your GCP service account key to Colab
from google.colab import files
uploaded = files.upload()  # Upload your GCP service account JSON key file

# 3. Set GCP authentication
import os
key_path = next(iter(uploaded.keys()))  # Get the filename of the uploaded key
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = key_path

# 4. Save your model in Colab
import joblib
# First train your model or load the existing one
# knn_model = ...

# Save locally in Colab
joblib.dump(knn_model, 'knn_irrigation_model.joblib')
joblib.dump(scaler, 'knn_scaler.joblib')

# 5. Upload to GCP bucket
from google.cloud import storage

def upload_to_gcp_bucket(bucket_name, source_file_name, destination_blob_name):
    '''Uploads a file to the GCP bucket'''
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)
    blob.upload_from_filename(source_file_name)
    print(f"File {source_file_name} uploaded to {destination_blob_name} in bucket {bucket_name}")

# Upload model files to GCP bucket
bucket_name = 'your-bucket-name'  # Replace with your actual bucket name
upload_to_gcp_bucket(bucket_name, 'knn_irrigation_model.joblib', 'models/knn_irrigation_model.joblib')
upload_to_gcp_bucket(bucket_name, 'knn_scaler.joblib', 'models/knn_scaler.joblib')

# 6. To later load the model from GCP in another Colab notebook or application:
def download_from_gcp_bucket(bucket_name, source_blob_name, destination_file_name):
    '''Downloads a file from the GCP bucket'''
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(source_blob_name)
    blob.download_to_filename(destination_file_name)
    print(f"File {source_blob_name} downloaded to {destination_file_name}")

# Download model files from GCP bucket
download_from_gcp_bucket(bucket_name, 'models/knn_irrigation_model.joblib', 'knn_irrigation_model.joblib')
download_from_gcp_bucket(bucket_name, 'models/knn_scaler.joblib', 'knn_scaler.joblib')

# Load the downloaded model
model = joblib.load('knn_irrigation_model.joblib')
scaler = joblib.load('knn_scaler.joblib')
""")

# Add code to create a simple prediction function
print("\n# Simple prediction function example:")
print("""
def predict_irrigation_needed(model, scaler, crop_type, soil_moisture, temperature, humidity):
    '''
    Makes a prediction using the trained KNN model

    Parameters:
    - model: Trained KNN model
    - scaler: Fitted StandardScaler
    - crop_type: Encoded crop type (0-8)
    - soil_moisture: Soil moisture value
    - temperature: Temperature value
    - humidity: Humidity value

    Returns:
    - Boolean indicating if irrigation is needed
    '''
    # Prepare input data
    input_data = [[crop_type, soil_moisture, temperature, humidity]]

    # Scale the input data
    scaled_input = scaler.transform(input_data)

    # Make prediction
    prediction = model.predict(scaled_input)[0]

    return bool(prediction)

# Example usage
needs_irrigation = predict_irrigation_needed(model, scaler,
                                           crop_type=3,      # Maize
                                           soil_moisture=300,
                                           temperature=30,
                                           humidity=40)
print(f"Irrigation needed: {'Yes' if needs_irrigation else 'No'}")
""")




In [None]:
#Local Loading
print("\nExample code to load and use the model:")
import joblib
model = joblib.load('/content/drive/MyDrive/KNNModel/knn_irrigation_model.joblib')
scaler = joblib.load('/content/drive/MyDrive/KNNModel/knn_scaler.joblib')

# Prepare input data (must have same features as training data)
# Example input: [CropType, SoilMoisture, temperature, Humidity]
input_data = [[3, 500, 28, 65]]  # Example values

# Scale the input data
scaled_input = scaler.transform(input_data)

# Make prediction
prediction = model.predict(scaled_input)
print(f"Irrigation needed: {'Yes' if prediction[0] == 1 else 'No'}")


In [None]:
#Firebase manual data entry
import firebase_admin
from firebase_admin import credentials, db
from datetime import datetime
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Initialize Firebase if not already initialized
if not firebase_admin._apps:
    cred = credentials.Certificate(os.getenv('FIREBASE_CREDENTIALS_PATH'))
    firebase_admin.initialize_app(cred, {
        'databaseURL': os.getenv('FIREBASE_DATABASE_URL')
    })

# Crop type mapping dictionary (name to encoded value)
crop_mapping = {
    "Coffee": 0,
    "Garden Flowers": 1,
    "Groundnuts": 2,
    "Maize": 3,
    "Paddy": 4,
    "Potato": 5,
    "Pulse": 6,
    "Sugarcane": 7,
    "Wheat": 8
}

# Function to convert crop name to number
def get_crop_code(crop_name):
    if isinstance(crop_name, int) and 0 <= crop_name <= 8:
        return crop_name  # Already a valid code
    elif crop_name in crop_mapping:
        return crop_mapping[crop_name]
    else:
        print(f"Warning: '{crop_name}' not found in crop mapping. Using default (Maize:3)")
        return 3  # Default to Maize if not found

# Get crop input from user
print("Available crops:", ", ".join(crop_mapping.keys()))
crop_input = input("Enter crop name (or press Enter for 'Groundnuts'): ").strip()
if not crop_input:
    crop_input = "Groundnuts"  # Default crop

# Get other sensor values
try:
    temp = float(input("Enter temperature (°C) [28.5]: ") or "28.5")
    humidity = float(input("Enter humidity (%) [65.3]: ") or "65.3")
    soil_moisture = float(input("Enter soil moisture (%) [35.2]: ") or "35.2")
except ValueError:
    print("Invalid input. Using default values.")
    temp = 28.5
    humidity = 65.3
    soil_moisture = 35.2

# Convert crop name to number
crop_code = get_crop_code(crop_input)

# Create test data
test_data = {
    "CropType": crop_code,
    "temperature": temp,
    "Humidity": humidity,
    "SoilMoisture": soil_moisture
}

# Print what we're sending to Firebase
print("\nSending to Firebase:")
print(f"  Crop: {crop_input} (code: {crop_code})")
print(f"  Temperature: {temp}°C")
print(f"  Humidity: {humidity}%")
print(f"  Soil Moisture: {soil_moisture}%")

# Send data to Firebase
sensor_ref = db.reference("sensor")
sensor_ref.set(test_data)

# Store sensor data permanently in Realtime Database (instead of Firestore)
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
record = {
    "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
    "sensor_data": test_data,
    "crop_name": crop_input,
    "crop_code": crop_code
}

# Add to the "history/sensors" node in Realtime Database
history_ref = db.reference("history/sensors")
history_ref.child(timestamp).set(record)

print("\nTest data added to Firebase successfully!")
print(f"Data permanently stored in database with ID: {timestamp}")
print("Run 1stcode.py to get the irrigation prediction.")

In [None]:
#Firebase prediction
import firebase_admin
from firebase_admin import credentials, db
import joblib, pickle
import numpy as np
from datetime import datetime
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Only initialize if not already initialized
if not firebase_admin._apps:
    cred = credentials.Certificate(os.getenv('FIREBASE_CREDENTIALS_PATH'))
    firebase_admin.initialize_app(cred, {
        'databaseURL': os.getenv('FIREBASE_DATABASE_URL')
    })

# Load model, scaler, feature names
model = joblib.load(os.getenv('MODEL_PATH'))
scaler = joblib.load(os.getenv('SCALER_PATH'))
with open(os.getenv('FEATURE_NAMES_PATH'), "rb") as f:
    feature_names_dict = pickle.load(f)

# Extract the list of feature names
feature_list = feature_names_dict['features']
print(f"Required features: {feature_list}")

# Get sensor data from Firebase
sensor_data = db.reference("sensor").get()
if not sensor_data:
    print("No sensor data found.")
else:
    print("Raw sensor data from Firebase:", sensor_data)

    # Create a properly formatted input array with all required features
    input_data = []
    for feature in feature_list:
        if feature in sensor_data:
            input_data.append(sensor_data[feature])
        else:
            print(f"Warning: Feature '{feature}' not found in sensor data. Using default value 0.")
            input_data.append(0)

    print(f"Processed input data: {input_data}")

    # Verify we have the correct number of features
    if len(input_data) != len(feature_list):
        print(f"Error: Input data has {len(input_data)} features, but model expects {len(feature_list)} features.")
    else:
        # Create a DataFrame with feature names to avoid the warning
        import pandas as pd
        input_df = pd.DataFrame([input_data], columns=feature_list)

        # Predict
        scaled = scaler.transform(input_df)
        prediction = int(model.predict(scaled)[0])

        # Push prediction back to Firebase
        db.reference("prediction").set(prediction)
        print(f"Prediction sent to Firebase: {prediction} ({'Irrigation needed' if prediction == 1 else 'No irrigation needed'})")

        # Store data permanently in Realtime Database instead of Firestore
        timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

        # Create data record with all relevant information
        record = {
            "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
            "sensor_data": sensor_data,
            "prediction": prediction,
            "prediction_text": 'Irrigation needed' if prediction == 1 else 'No irrigation needed'
        }

        # Add to the "history/predictions" node in Realtime Database
        history_ref = db.reference("history/predictions")
        history_ref.child(timestamp).set(record)
        print(f"Data permanently stored in database with ID: {timestamp}")


In [None]:
#Servo motor ON/OFF
import firebase_admin
from firebase_admin import credentials, db
import time
from datetime import datetime
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Initialize Firebase (if not already initialized)
if not firebase_admin._apps:
    cred = credentials.Certificate(os.getenv('FIREBASE_CREDENTIALS_PATH'))
    firebase_admin.initialize_app(cred, {
        'databaseURL': os.getenv('FIREBASE_DATABASE_URL')
    })

def simulate_hardware():
    print("=== Smart Irrigation Hardware Simulator ===")
    print("Starting simulation...")

    # Create reference to the prediction node
    prediction_ref = db.reference("prediction")

    # Track servo state
    servo_on = False

    try:
        while True:
            # Get current prediction value
            prediction = prediction_ref.get()
            current_time = datetime.now().strftime("%H:%M:%S")

            print(f"\n[{current_time}] Checking prediction value...")
            print(f"Current prediction from Firebase: {prediction}")

            # Logic to determine if servo should be on/off
            # Assuming prediction = 1 means "turn on irrigation"
            if prediction == 1 and not servo_on:
                servo_on = True
                print("🔄 ACTION: Servo motor activated - IRRIGATION STARTED")
                # In real hardware, this would trigger GPIO pins to control the servo
            elif prediction == 0 and servo_on:
                servo_on = False
                print("🔄 ACTION: Servo motor deactivated - IRRIGATION STOPPED")
            else:
                print(f"🔄 Servo status: {'ON' if servo_on else 'OFF'} (No change needed)")

            # Hardware simulator status
            print(f"💧 Irrigation system status: {'ACTIVE' if servo_on else 'INACTIVE'}")

            # Wait before checking again
            print("Waiting 5 seconds before next check...")
            time.sleep(5)

    except KeyboardInterrupt:
        print("\nSimulation stopped by user")
    except Exception as e:
        print(f"Error in simulation: {e}")
    finally:
        print("Simulation ended")

if __name__ == "__main__":
    simulate_hardware()

In [None]:
import pickle
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

with open(os.getenv('FEATURE_NAMES_PATH'), "rb") as f:
    feature_names = pickle.load(f)
print(feature_names)
