In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import OneHotEncoder
# Load the CSV file
data = pd.read_excel('/Users/aravindryali/Desktop/Studies/ML_projects/car_data.xlsx')

# Identify the categorical columns
categorical_cols = ['infotainment_system_status', 'navigation_system_status']

# One-hot encoding for categorical variables
encoder = OneHotEncoder()
encoded_data = encoder.fit_transform(data[categorical_cols])
encoded_df = pd.DataFrame(encoded_data.toarray(), columns=encoder.get_feature_names_out())

# Concatenate the encoded data with the original data
data = pd.concat([data, encoded_df], axis=1)

# Drop the original categorical columns
data = data.drop(categorical_cols, axis=1)

# Identify the numerical columns
numerical_cols = ['temperature', 'humidity', 'noise_level', 'vibration_level', 
                  'driver_seat_position', 'passenger_seat_position']

# Scale the numerical features using StandardScaler
scaler = StandardScaler()
data[numerical_cols] = scaler.fit_transform(data[numerical_cols])

# Split the data into training and testing sets
X = data.drop(['comfort', 'distraction'], axis=1)
y = data[['comfort', 'distraction']]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a random forest classifier for each target variable
models = {}
for target in ['comfort', 'distraction']:
    #clf_1 = SVC(kernel='linear', C=1)
    clf_1 = RandomForestClassifier(n_estimators=50)
    #clf_1 = LogisticRegression(max_iter=100)
    clf_1.fit(X_train, y_train[target])
    models[target] = clf_1

# Evaluate the models
for target, model in models.items():
    y_pred = model.predict(X_test)
    print(f"Target: {target}")
    print(f"Accuracy: {accuracy_score(y_test[target], y_pred):.3f}")
    print(f"Classification Report:\n{classification_report(y_test[target], y_pred)}")
    print()



# Now, you can use the trained models to make predictions on new data
def predict_comfortdistraction(temp, humidity, noise, vibration, driver_seat, passenger_seat, infotainment, navigation):
    # Create a new data point
    new_data = pd.DataFrame({
        'temperature': [temp],
        'humidity': [humidity],
        'noise_level': [noise],
        'vibration_level': [vibration],
        'driver_seat_position': [driver_seat],
        'passenger_seat_position': [passenger_seat],
        'infotainment_system_status': [infotainment],
        'navigation_system_status': [navigation]
    })

    # One-hot encode the categorical variables
    encoded_data = encoder.transform(new_data[['infotainment_system_status', 'navigation_system_status']])
    encoded_df = pd.DataFrame(encoded_data.toarray(), columns=encoder.get_feature_names_out())
    new_data = pd.concat([new_data, encoded_df], axis=1)
    new_data = new_data.drop(['infotainment_system_status', 'navigation_system_status'], axis=1)

    # Scale the numerical features
    new_data[numerical_cols] = scaler.transform(new_data[numerical_cols])

    # Make predictions
    comfort_pred = models['comfort'].predict(new_data)
    distraction_pred = models['distraction'].predict(new_data)

    return comfort_pred[0], distraction_pred[0]

# Example usage:
temp = 20
humidity = 50
noise = 40
vibration = 20
driver_seat = 5
passenger_seat = 3
infotainment = 'on'
navigation = 'navigating'

comfort, distraction = predict_comfortdistraction(temp, humidity, noise, vibration, driver_seat, passenger_seat, infotainment, navigation)
print(f"Predicted comfort: {comfort}, distraction: {distraction}")

Target: comfort
Accuracy: 0.990
Classification Report:
               precision    recall  f1-score   support

  comfortable       1.00      0.97      0.98        59
uncomfortable       0.99      1.00      0.99       141

     accuracy                           0.99       200
    macro avg       0.99      0.98      0.99       200
 weighted avg       0.99      0.99      0.99       200


Target: distraction
Accuracy: 0.810
Classification Report:
              precision    recall  f1-score   support

          No       0.68      0.87      0.76        70
         Yes       0.92      0.78      0.84       130

    accuracy                           0.81       200
   macro avg       0.80      0.82      0.80       200
weighted avg       0.83      0.81      0.81       200


Predicted comfort: comfortable, distraction: No
