In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error, r2_score

In [18]:
# CSV-Datei einlesen
df = pd.read_csv('laboratory_tasks.csv')

In [19]:
# Feature-Engineering
le = LabelEncoder()
df['TaskStatus_encoded'] = le.fit_transform(df['TaskStatus'])
df['ResourceAllocated_encoded'] = le.fit_transform(df['ResourceAllocated'])

# Features und Zielwariable definieren
features = ['TaskStatus_encoded', 'ResourceAllocated_encoded', 'Reward']
target = 'TimeSpent'

X = df[features]
y = df[target]

# Daten in Trainings- und Testsets aufteilen
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [20]:
# Random Forest Regressor-Modell initialisieren und trainieren
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Vorhersagen für das Testset machen
y_pred = rf_model.predict(X_test)

# Modellleistung bewerten
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R-squared Score: {r2}")

# Feature-Wichtigkeit analysieren
feature_importance = pd.DataFrame({
    'feature': features,
    'importance': rf_model.feature_importances_
}).sort_values('importance', ascending=False)

print("\nFeature Importance:")
print(feature_importance)

Mean Squared Error: 0.017583333333333343
R-squared Score: 0.9882777777777778

Feature Importance:
                     feature  importance
1  ResourceAllocated_encoded    0.535791
2                     Reward    0.456335
0         TaskStatus_encoded    0.007874


In [21]:
# Beispiel für eine Vorhersage
sample_input = pd.DataFrame({
    'TaskStatus_encoded': [1],
    'ResourceAllocated_encoded': [2],
    'Reward': [1.5]
})

predicted_time = rf_model.predict(sample_input)
print(f"\nVorhergesagte Zeit für die Beispieleingabe: {predicted_time[0]:.2f}")


Vorhergesagte Zeit für die Beispieleingabe: 2.23
