In [None]:
import pandas as pd
import random
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Load dataset
file_path = '/home/Smart_Fleet_management_data.csv'
df = pd.read_csv(file_path)

# Define recommendations
recommendations = {
    'good': ["The vehicle is in optimal condition. No immediate actions are required.",
             "All systems are functioning well. Regular maintenance is recommended to keep performance consistent."],
    'moderate': ["Some parameters indicate moderate performance. Consider inspecting the cooling system and checking fuel consumption.",
                 "Vehicle is performing adequately, but areas like engine temperature and braking could be improved."],
    'poor': ["Multiple issues detected! Immediate action required for engine temperature, tire pressure, and braking force.",
             "Critical issues found in fuel consumption and CO2 emissions. Urgent maintenance needed."]
}

# Function to simulate complex condition based on interactions of multiple parameters
def generate_target(row):
    if (row['Engine_Temperature_C'] > 100 and row['Fuel_Consumption_km_l'] < 3.5) or \
       (row['Braking_Force_Percent'] < 50 and row['Vehicle_Speed_kph'] > 90) or \
       (row['Tire_Pressure_PSI'] < 85 and row['CO2_Emissions_g_km'] > 250):
        return 'poor'
    elif (row['Engine_Temperature_C'] > 95 or row['Fuel_Consumption_km_l'] < 4) or \
         (row['Braking_Force_Percent'] < 60) or \
         (row['Tire_Pressure_PSI'] < 90):
        return 'moderate'
    else:
        return 'good'

# Create target column based on complex interaction logic
df['Condition'] = df.apply(generate_target, axis=1)

# Define the features to be used in the Random Forest model
features = ['Fuel_Consumption_km_l', 'Engine_Temperature_C', 'Vehicle_Speed_kph', 'GPS_Routing_Deviation',
            'Tire_Pressure_PSI', 'Braking_Force_Percent', 'Vibration_g', 'Eye_Closure_Interval_s',
            'Vehicle_Load_Percent', 'CO2_Emissions_g_km']

X = df[features]
y = df['Condition']

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define hyperparameters grid for tuning
param_grid = {
    'n_estimators': [100, 200, 500],          # Number of trees
    'max_depth': [10, 20, None],              # Maximum depth of trees
    'min_samples_split': [2, 10, 20],         # Minimum samples to split a node
    'min_samples_leaf': [1, 5, 10],           # Minimum samples in a leaf node
    'max_features': ['sqrt', 'log2', None],   # Number of features considered for splits
    'bootstrap': [True, False]                # Bootstrap sampling
}

# Initialize the Random Forest model
rf_model = RandomForestClassifier(random_state=42)

# GridSearchCV for hyperparameter tuning
grid_search = GridSearchCV(estimator=rf_model, param_grid=param_grid, cv=5, verbose=2, n_jobs=-1)

# Train the model using grid search
grid_search.fit(X_train, y_train)

# Get the best parameters from GridSearchCV
best_params = grid_search.best_params_
print("Best Hyperparameters:", best_params)

# Use the best model to predict
best_rf_model = grid_search.best_estimator_
y_pred = best_rf_model.predict(X_test)

# Evaluate the model
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Feature Importance to understand how parameters interact
feature_importances = pd.DataFrame({
    'Feature': features,
    'Importance': best_rf_model.feature_importances_
}).sort_values(by='Importance', ascending=False)

print("Feature Importances:")
print(feature_importances)

# Generate final recommendations based on the tuned model
def generate_final_recommendation(row):
    prediction = best_rf_model.predict([row[features].values])[0]

    if prediction == 'good':
        return random.choice(recommendations['good'])
    elif prediction == 'moderate':
        return random.choice(recommendations['moderate'])
    else:
        return random.choice(recommendations['poor'])

# Apply the final recommendation logic to each row
df['Final_Recommendation'] = df.apply(generate_final_recommendation, axis=1)

# Save the updated DataFrame with recommendations to CSV
output_file_path = '/home/final_output_with_tuning.csv'
df.to_csv(output_file_path, index=False)

print(f"CSV file saved as {output_file_path}")
print(df[['Final_Recommendation']].head())