In [1]:
%pip install pandas
%pip install numpy
%pip install scikit-learn

Collecting pandas
  Using cached pandas-2.2.3-cp310-cp310-win_amd64.whl (11.6 MB)
Collecting numpy>=1.22.4
  Downloading numpy-2.2.2-cp310-cp310-win_amd64.whl (12.9 MB)
     ---------------------------------------- 12.9/12.9 MB 9.3 MB/s eta 0:00:00
Collecting pytz>=2020.1
  Using cached pytz-2024.2-py2.py3-none-any.whl (508 kB)
Collecting tzdata>=2022.7
  Downloading tzdata-2025.1-py2.py3-none-any.whl (346 kB)
     ------------------------------------- 346.8/346.8 kB 10.9 MB/s eta 0:00:00
Installing collected packages: pytz, tzdata, numpy, pandas
Successfully installed numpy-2.2.2 pandas-2.2.3 pytz-2024.2 tzdata-2025.1
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.2.1 -> 25.0
[notice] To update, run: python.exe -m pip install --upgrade pip


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.2.1 -> 25.0
[notice] To update, run: python.exe -m pip install --upgrade pip


Collecting scikit-learnNote: you may need to restart the kernel to use updated packages.

  Downloading scikit_learn-1.6.1-cp310-cp310-win_amd64.whl (11.1 MB)
     --------------------------------------- 11.1/11.1 MB 11.3 MB/s eta 0:00:00
Collecting joblib>=1.2.0
  Using cached joblib-1.4.2-py3-none-any.whl (301 kB)
Collecting scipy>=1.6.0
  Downloading scipy-1.15.1-cp310-cp310-win_amd64.whl (43.9 MB)
     ---------------------------------------- 43.9/43.9 MB 7.0 MB/s eta 0:00:00
Collecting threadpoolctl>=3.1.0
  Using cached threadpoolctl-3.5.0-py3-none-any.whl (18 kB)
Installing collected packages: threadpoolctl, scipy, joblib, scikit-learn
Successfully installed joblib-1.4.2 scikit-learn-1.6.1 scipy-1.15.1 threadpoolctl-3.5.0



[notice] A new release of pip available: 22.2.1 -> 25.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_squared_error
import joblib  

# **Plant Growth Data**

In [25]:
df = pd.read_csv("Datasets/plant_growth_data.csv")
df.head()

Unnamed: 0,Soil_Type,Sunlight_Hours,Water_Frequency,Fertilizer_Type,Temperature,Humidity,Growth_Milestone
0,loam,5.192294,bi-weekly,chemical,31.719602,61.591861,0
1,sandy,4.033133,weekly,organic,28.919484,52.422276,1
2,loam,8.892769,bi-weekly,none,23.179059,44.660539,0
3,loam,8.241144,bi-weekly,none,18.465886,46.433227,0
4,sandy,8.374043,bi-weekly,organic,18.128741,63.625923,0


In [26]:
# Encoding Fitur Kategorikal
label_encoders = {}
for column in ["Soil_Type", "Water_Frequency", "Fertilizer_Type"]:
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])
    label_encoders[column] = le

label_encoders

{'Soil_Type': LabelEncoder(),
 'Water_Frequency': LabelEncoder(),
 'Fertilizer_Type': LabelEncoder()}

In [27]:
# Fitur dan Target
X = df.drop("Growth_Milestone", axis=1)
y = df["Growth_Milestone"]

# Split Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [28]:
# Model Random Forest
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [29]:
# Evaluasi Model
y_pred = model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
f"RMSE: {rmse:.2f}"

'RMSE: 0.55'

In [30]:
joblib.dump(model, "Model/random_forest_plant_growth.pkl")

['Model/random_forest_plant_growth.pkl']

In [35]:
loaded_model = joblib.load("Model/random_forest_plant_growth.pkl")

# Fungsi Prediksi Kustom
def predict(input_data):
    # Format data menjadi DataFrame
    input_df = pd.DataFrame([input_data])

    # Encode fitur kategorikal
    for column, le in label_encoders.items():
        if column in input_df:
            input_df[column] = le.transform(input_df[column])

    # Prediksi
    prediction = loaded_model.predict(input_df)[0]
    return f"Probabilitas bertahan hidup: {prediction * 100:.2f}%"

In [36]:
predict({
    "Soil_Type": "sandy",
    "Sunlight_Hours": 5.0,
    "Water_Frequency": "weekly",
    "Fertilizer_Type": "organic",
    "Temperature": 38.0,
    "Humidity": 50.0, 
})

'Probabilitas bertahan hidup: 68.00%'

# **Plant Health Data**

In [37]:
df = pd.read_csv("Datasets/plant_health_data.csv")
df.head()

Unnamed: 0,Timestamp,Plant_ID,Soil_Moisture,Ambient_Temperature,Soil_Temperature,Humidity,Light_Intensity,Soil_pH,Nitrogen_Level,Phosphorus_Level,Potassium_Level,Chlorophyll_Content,Electrochemical_Signal,Plant_Health_Status
0,2024-10-03 10:54:53.407995,1,27.521109,22.240245,21.900435,55.291904,556.172805,5.581955,10.00365,45.806852,39.076199,35.703006,0.941402,High Stress
1,2024-10-03 16:54:53.407995,1,14.835566,21.706763,18.680892,63.949181,596.136721,7.135705,30.712562,25.394393,17.944826,27.993296,0.164899,High Stress
2,2024-10-03 22:54:53.407995,1,17.086362,21.180946,15.392939,67.837956,591.124627,5.656852,29.337002,27.573892,35.70653,43.646308,1.081728,High Stress
3,2024-10-04 04:54:53.407995,1,15.336156,22.593302,22.778394,58.190811,241.412476,5.584523,16.966621,26.180705,26.257746,37.838095,1.186088,High Stress
4,2024-10-04 10:54:53.407995,1,39.822216,28.929001,18.100937,63.772036,444.49383,5.919707,10.944961,37.898907,37.654483,48.265812,1.609805,High Stress


In [38]:
df = df[["Soil_Moisture", "Humidity", "Light_Intensity", "Plant_Health_Status"]]
df.head()

Unnamed: 0,Soil_Moisture,Humidity,Light_Intensity,Plant_Health_Status
0,27.521109,55.291904,556.172805,High Stress
1,14.835566,63.949181,596.136721,High Stress
2,17.086362,67.837956,591.124627,High Stress
3,15.336156,58.190811,241.412476,High Stress
4,39.822216,63.772036,444.49383,High Stress


In [39]:
def change_plant_health_status(x: str) -> float:
    if x == "Healthy":
        return 1.0
    elif x == "Moderate Stress":
        return 0.5
    else:
        return 0.0

df["Plant_Health_Status"] = df["Plant_Health_Status"].apply(lambda x: change_plant_health_status(x))
df.head()

Unnamed: 0,Soil_Moisture,Humidity,Light_Intensity,Plant_Health_Status
0,27.521109,55.291904,556.172805,0.0
1,14.835566,63.949181,596.136721,0.0
2,17.086362,67.837956,591.124627,0.0
3,15.336156,58.190811,241.412476,0.0
4,39.822216,63.772036,444.49383,0.0


In [40]:
df.tail()

Unnamed: 0,Soil_Moisture,Humidity,Light_Intensity,Plant_Health_Status
1195,29.66578,54.668196,650.536854,0.5
1196,15.490782,61.243143,768.760787,0.0
1197,23.495723,40.693671,293.643366,0.5
1198,30.971675,41.387107,492.952014,1.0
1199,24.406357,47.505394,304.201566,0.5


In [41]:
X = df.drop("Plant_Health_Status", axis=1)
y = df["Plant_Health_Status"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [42]:
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [43]:
y_pred = model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
f"RMSE: {rmse:.2f}"

'RMSE: 0.23'

In [44]:
joblib.dump(model, "Model/random_forest_plant_health.pkl")

['Model/random_forest_plant_health.pkl']

In [51]:
loaded_model = joblib.load("Model/random_forest_plant_health.pkl")

def predict(input_data):
    expected_features = loaded_model.feature_names_in_
    input_df = pd.DataFrame([input_data], columns=expected_features)
    
    prediction = loaded_model.predict(input_df)[0]
    return f"Probabilitas bertahan hidup: {prediction * 100:.2f}%"

In [48]:
loaded_model.feature_names_in_

array(['Soil_Moisture', 'Humidity', 'Light_Intensity'], dtype=object)

In [69]:
predict({
    "Soil_Moisture": 100.0, 
    "Light_Intensity": 100.0,
    "Humidity": 0.0, 
})

'Probabilitas bertahan hidup: 64.00%'