In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Daten einlesen
data = pd.read_csv('test.csv')

data['datetime'] = pd.to_datetime(data['datetime'])

data = data.sort_values(by=['machineID', 'datetime'])
data['comp_replaced'] = data['comp'].notnull().astype(int)

import numpy as np

data['time_to_next_replacement'] = np.nan

for machine in data['machineID'].unique():
    machine_data = data[data['machineID'] == machine]
    replacement_times = machine_data[machine_data['comp_replaced'] == 1]['datetime']

    if replacement_times.empty:
        continue

    replacement_times = pd.to_datetime(replacement_times)
    next_replacement = replacement_times.iloc[0]

    for idx, row in machine_data.iterrows():
        if row['datetime'] >= next_replacement:
            future_replacements = replacement_times[replacement_times > row['datetime']]
            if not future_replacements.empty:
                next_replacement = future_replacements.iloc[0]
            else:
                break  # Kein weiterer Austausch
        time_delta = (next_replacement - row['datetime']).total_seconds() / 3600  # in Stunden
        data.loc[idx, 'time_to_next_replacement'] = time_delta

features = ['volt', 'rotate', 'pressure', 'vibration', 'age']
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X = scaler.fit_transform(data[features])


In [21]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error


# Set NaN values in the target variable to -1
data['time_to_next_replacement'].fillna(-1, inplace=True)

y = data['time_to_next_replacement']
X_clean = scaler.transform(data[features])  # Use the same scaler to transform the data

X_train, X_test, y_train, y_test = train_test_split(X_clean, y, test_size=0.2)

model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print(f'Mean Absolute Error (MAE): {mae}')
print(f'Root Mean Squared Error (RMSE): {rmse}')


Mean Absolute Error (MAE): 102.3808888888889
Root Mean Squared Error (RMSE): 128.43358751042393


In [22]:
import joblib

# Annahme: Ihr trainiertes Modell ist in der Variable 'model' gespeichert
joblib.dump(model, 'rul_model.pkl')

# Falls Sie einen Skalierer verwendet haben, speichern Sie diesen ebenfalls
joblib.dump(scaler, 'scaler.pkl')


['scaler.pkl']

In [None]:
import pandas as pd

# Aktuelle Sensordaten erfassen (z.B. aus einer Datenbank oder direkt von den Sensoren)
# Hier ein Beispiel-Datenpunkt
current_data = pd.DataFrame({
    'volt': [170.0],
    'rotate': [420.0],
    'pressure': [100.0],
    'vibration': [40.0],
    'age': [18]
})

# Die gleichen Vorverarbeitungsschritte anwenden wie beim Training
X_current = scaler.transform(current_data[features])

# Vorhersage der verbleibenden Nutzungsdauer
rul_prediction = model.predict(X_current)

print(f'Vorhergesagte verbleibende Nutzungsdauer: {rul_prediction[0]:.2f} Stunden')


Vorhergesagte verbleibende Nutzungsdauer: 220.25 Stunden
