In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
# from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder, StandardScaler
import joblib

In [6]:
df = pd.read_csv('biogas.csv')

In [7]:
df

Unnamed: 0,timestamp,ph,biogas_production,anomaly,cause
0,2024-01-01 08:00:00,6.87,34.81,1,
1,2024-01-01 10:00:00,7.45,50.21,1,
2,2024-01-01 12:00:00,7.23,35.38,1,
3,2024-01-01 14:00:00,7.10,50.82,1,
4,2024-01-01 16:00:00,6.66,36.89,1,
...,...,...,...,...,...
2995,2024-09-06 22:00:00,7.37,58.44,0,
2996,2024-09-07 00:00:00,6.66,37.76,0,
2997,2024-09-07 02:00:00,8.29,15.35,0,pH tinggi
2998,2024-09-07 04:00:00,6.79,42.07,0,


In [9]:
df['timestamp'] = pd.to_datetime(df['timestamp'])

df['hour'] = df['timestamp'].dt.hour
df['day'] = df['timestamp'].dt.day
df['month'] = df['timestamp'].dt.month
df['day_of_week'] = df['timestamp'].dt.dayofweek

In [10]:
X = df[['ph', 'biogas_production', 'hour', 'day', 'month', 'day_of_week']]
y = df[['anomaly', 'cause']]

In [11]:
print("\nMissing values in X:", X.isna().sum())
print("Missing values in y:", y.isna().sum())


Missing values in X: ph                   0
biogas_production    0
hour                 0
day                  0
month                0
day_of_week          0
dtype: int64
Missing values in y: anomaly       0
cause      2700
dtype: int64


In [13]:
for column in X.columns:
    X[column] = X[column].fillna(X[column].mean())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[column] = X[column].fillna(X[column].mean())


In [14]:
rows_before = len(y)
non_na_indices = y.dropna().index
X = X.loc[non_na_indices]
y = y.loc[non_na_indices]
rows_after = len(y)
print(f"{rows_before - rows_after}")


2700


In [15]:
cause_encoder = LabelEncoder()
cause_encoded = cause_encoder.fit_transform(y['cause'])

In [16]:
cause_mapping = dict(zip(cause_encoder.classes_, cause_encoder.transform(cause_encoder.classes_)))
print("Cause mapping:", cause_mapping)

Cause mapping: {'Maintenance': 0, 'Produksi naik drastis': 1, 'Produksi turun': 2, 'pH rendah': 3, 'pH tinggi': 4}


In [17]:
y['cause'] = cause_encoded
print(y)

      anomaly  cause
16          1      4
60          1      1
72          1      0
89          1      4
91          1      4
...       ...    ...
2964        0      2
2972        0      3
2985        0      1
2990        0      4
2997        0      4

[300 rows x 2 columns]


In [18]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_scaled_df = pd.DataFrame(X_scaled, columns=X.columns, index=X.index)

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled_df, y, test_size=0.2, random_state=42)

In [40]:
from sklearn.feature_selection import SelectKBest, f_regression

selector = SelectKBest(f_regression, k=3)
X_train_selected = selector.fit_transform(X_train, y_train['anomaly'])
X_test_selected = selector.transform(X_test)

In [41]:
from sklearn.tree import DecisionTreeRegressor
model = DecisionTreeRegressor(max_depth=3) 
model.fit(X_train_selected, y_train)

In [46]:
model_package = {
    'model': model,
    'scaler': scaler,
    'cause_encoder': cause_encoder,
    'cause_mapping': cause_mapping,
    'feature_selector': selector,
    'feature_columns': X.columns.tolist()
}

In [47]:
joblib.dump(model_package, 'biogas_anomaly_model.pkl')

['biogas_anomaly_model.pkl']

In [48]:
from azureml.core import Workspace
ws = Workspace.from_config()

In [49]:
from azureml.core import Model

registered_model = Model.register(
    model_path="biogas_anomaly_model.pkl",
    model_name="model_biogas",
    workspace=ws
)
print("Model registered: ", registered_model.name)

Registering model model_biogas
Model registered:  model_biogas


In [50]:
from azure.ai.ml import MLClient
from azure.ai.ml.entities import ManagedOnlineEndpoint, ManagedOnlineDeployment
from azure.identity import InteractiveBrowserCredential

credential = InteractiveBrowserCredential()

ml_client = MLClient(
    credential=credential,
    subscription_id="ca50b345-b6d7-4d97-be52-8847c2e0321e",
    resource_group_name="318",  
    workspace_name="bioserde_ml"
)

# endpoint = ManagedOnlineEndpoint(
#     name="biogas-endpoint",
#     description="Biogas anomaly detection endpoint",
#     auth_mode="key"
# )

# ml_client.online_endpoints.begin_create_or_update(endpoint).result()

Overriding of current TracerProvider is not allowed
Overriding of current LoggerProvider is not allowed
Overriding of current MeterProvider is not allowed
Attempting to instrument while already instrumented
Attempting to instrument while already instrumented
Attempting to instrument while already instrumented
Attempting to instrument while already instrumented
Attempting to instrument while already instrumented


In [51]:
from azure.ai.ml.entities import ManagedOnlineDeployment, Environment, CodeConfiguration

env = Environment(
    name="biogas-env",
    image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest",
    conda_file={
        "name": "biogas-project",
        "channels": ["conda-forge"],
        "dependencies": [
            "python=3.8",
            "pip=21.3.1",
            {"pip": [
                "numpy>=1.21.0",
                "scikit-learn>=1.0.0",
                "joblib>=1.1.0",
            ]}
        ]
    }
)

model = ml_client.models.get(name="model_biogas", version="4")
print(f"Using model: {model.name}, version: {model.version}")

deployment = ManagedOnlineDeployment(
    name="biogas-deployment",
    endpoint_name="biogas-endpoint",
    model=model,
    environment=env,
    instance_type="Standard_DS2_v2",
    instance_count=1,
    code_configuration=CodeConfiguration(
        code="./",
        scoring_script="score.py"
    )
)

print("Starting deployment - this might take 5-10 minutes...")
try:
    deployment_result = ml_client.online_deployments.begin_create_or_update(deployment)
    deployment_result.result(timeout=900)
    
    print("Deployment successful!")
    
    endpoint = ml_client.online_endpoints.get("biogas-endpoint")
    ml_client.online_endpoints.begin_create_or_update(
        ManagedOnlineEndpoint(
            name="biogas-endpoint",
            traffic={"biogas-deployment": 100}
        )
    ).result()
    
    print("Traffic updated successfully!")
except Exception as e:
    print(f"Deployment error: {str(e)}")
    print("\nProceeding with local mock server solution...")

Instance type Standard_DS2_v2 may be too small for compute resources. Minimum recommended compute SKU is Standard_DS3_v2 for general purpose endpoints. Learn more about SKUs here: https://learn.microsoft.com/azure/machine-learning/referencemanaged-online-endpoints-vm-sku-list
Check: endpoint biogas-endpoint exists


Using model: model_biogas, version: 4
Starting deployment - this might take 5-10 minutes...


[32mUploading Bioserde Landing (1) (58.79 MBs): 100%|##########| 58791315/58791315 [00:27<00:00, 2104026.24it/s]
[39m



.............................Deployment error: (ResourceNotReady) User container has crashed or terminated. Please see troubleshooting guide, available here: https://aka.ms/oe-tsg#error-resourcenotready
Code: ResourceNotReady
Message: User container has crashed or terminated. Please see troubleshooting guide, available here: https://aka.ms/oe-tsg#error-resourcenotready

Proceeding with local mock server solution...


In [None]:
from azure.ai.ml import MLClient
from azure.identity import InteractiveBrowserCredential

# Authenticate
credential = InteractiveBrowserCredential()
ml_client = MLClient(
    credential=credential,
    subscription_id="ca50b345-b6d7-4d97-be52-8847c2e0321e",
    resource_group_name="318",
    workspace_name="bioserde_ml"
)

# Get endpoint
endpoint = ml_client.online_endpoints.get("biogas-model-endpoint")

# Get API keys
keys = ml_client.online_endpoints.get_keys("biogas-model-endpoint")
print("Primary Key:", keys.primary_key)
print("Secondary Key:", keys.secondary_key)

Overriding of current TracerProvider is not allowed
Overriding of current LoggerProvider is not allowed
Overriding of current MeterProvider is not allowed
Attempting to instrument while already instrumented
Attempting to instrument while already instrumented
Attempting to instrument while already instrumented
Attempting to instrument while already instrumented
Attempting to instrument while already instrumented


Primary Key: B4ZJoXXnCQdk5P1XArm69g7k4nYpfNmoH1XNE1faXZu2c8MinICIJQQJ99BFAAAAAAAAAAAAINFRAZMLraH3
Secondary Key: CLe32xdHyf5d49WPYQua4iY3Kyw8VsChRkqrx3Kfack0a0EwUssVJQQJ99BFAAAAAAAAAAAAINFRAZML1e88
