In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from google.colab import files

In [None]:
uploaded = files.upload()
file_name = list(uploaded.keys())[0]
column_names = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'class']
df = pd.read_csv(file_name, names=column_names)
df.head()

Saving car_evaluation.csv to car_evaluation.csv


Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc


In [None]:
df_processed = df.copy()

# Initialize label encoders
label_encoders = {}

# Encode categorical features
categorical_features = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety']

for feature in categorical_features:
    le = LabelEncoder()
    df_processed[feature] = le.fit_transform(df_processed[feature])
    label_encoders[feature] = le
    print(f"{feature}: {dict(zip(le.classes_, le.transform(le.classes_)))}")

# Encode target variable
le_target = LabelEncoder()
df_processed['class_encoded'] = le_target.fit_transform(df_processed['class'])

buying: {'high': np.int64(0), 'low': np.int64(1), 'med': np.int64(2), 'vhigh': np.int64(3)}
maint: {'high': np.int64(0), 'low': np.int64(1), 'med': np.int64(2), 'vhigh': np.int64(3)}
doors: {'2': np.int64(0), '3': np.int64(1), '4': np.int64(2), '5more': np.int64(3)}
persons: {'2': np.int64(0), '4': np.int64(1), 'more': np.int64(2)}
lug_boot: {'big': np.int64(0), 'med': np.int64(1), 'small': np.int64(2)}
safety: {'high': np.int64(0), 'low': np.int64(1), 'med': np.int64(2)}


In [None]:
X = df_processed.drop(['class', 'class_encoded'], axis=1)
y = df_processed['class_encoded']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [None]:
models = {
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
    'Logistic Regression': LogisticRegression(multi_class='multinomial', random_state=42, max_iter=1000),
    'Support Vector Machine': SVC(kernel='rbf', random_state=42, probability=True)
}

# Scale features for models that need it
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
results = {}

In [None]:
for name, model in models.items():
    if name in ['Logistic Regression', 'Support Vector Machine']:
        # Use scaled data for these models
        model.fit(X_train_scaled, y_train)
        y_pred = model.predict(X_test_scaled)
        y_pred_proba = model.predict_proba(X_test_scaled)
    else:
        # Use original data for tree-based models
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        y_pred_proba = model.predict_proba(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    results[name] = {
        'model': model,
        'accuracy': accuracy,
        'predictions': y_pred,
        'probabilities': y_pred_proba
    }
    print(f"\n{name}:")
    print(f"Accuracy: {accuracy:.4f}")


Random Forest:
Accuracy: 0.9827

Logistic Regression:
Accuracy: 0.6879





Support Vector Machine:
Accuracy: 0.9046


In [None]:
def predict_car_acceptability(buying, maint, doors, persons, lug_boot, safety, model, scaler, model_name):
    """
    Predict car acceptability based on features

    Parameters:
    buying: v-high, high, med, low
    maint: v-high, high, med, low
    doors: 2, 3, 4, 5-more
    persons: 2, 4, more
    lug_boot: small, med, big
    safety: low, med, high
    """

    # Create input dictionary
    input_data = {
        'buying': buying,
        'maint': maint,
        'doors': doors,
        'persons': persons,
        'lug_boot': lug_boot,
        'safety': safety
    }

    # Convert to DataFrame
    input_df = pd.DataFrame([input_data])

    # Encode categorical features
    for feature in categorical_features:
        input_df[feature] = label_encoders[feature].transform(input_df[feature])

    # Scale if needed
    if model_name in ['Logistic Regression', 'Support Vector Machine']:
        input_scaled = scaler.transform(input_df)
        prediction = model.predict(input_scaled)[0]
        probability = model.predict_proba(input_scaled)[0]
    else:
        prediction = model.predict(input_df)[0]
        probability = model.predict_proba(input_df)[0]

    # Get class name
    class_name = le_target.inverse_transform([prediction])[0]

    # Get probabilities for all classes
    class_probabilities = {}
    for i, class_name in enumerate(le_target.classes_):
        class_probabilities[class_name] = probability[i]

    return class_name, class_probabilities

# Test predictions
test_cases = [
    # Format: [buying, maint, doors, persons, lug_boot, safety]
    ['med', 'med', '2', '2', 'med', 'high'],      # Should be acceptable
    ['vhigh', 'vhigh', '2', '2', 'small', 'low'], # Should be unacceptable
    ['low', 'low', '4', 'more', 'big', 'high']    # Should be good
]

print("\n" + "="*60)
print("TEST PREDICTIONS")
print("="*60)

for name, model_obj in models.items():
    print(f"\n--- Testing with {name} ---")
    for i, test_case in enumerate(test_cases, 1):
        buying, maint, doors, persons, lug_boot, safety = test_case
        prediction, probabilities = predict_car_acceptability(
            buying, maint, doors, persons, lug_boot, safety,
            model_obj, scaler, name
        )

        print(f"\nTest Case {i}:")
        print(f"Features: buying={buying}, maint={maint}, doors={doors}, persons={persons}, lug_boot={lug_boot}, safety={safety}")
        print(f"Predicted Acceptability ({name}): {prediction}")
        print("Probabilities:")
        for class_name, prob in probabilities.items():
            print(f"  {class_name}: {prob:.4f}")


TEST PREDICTIONS

--- Testing with Random Forest ---

Test Case 1:
Features: buying=med, maint=med, doors=2, persons=2, lug_boot=med, safety=high
Predicted Acceptability (Random Forest): vgood
Probabilities:
  acc: 0.0000
  good: 0.0000
  unacc: 1.0000
  vgood: 0.0000

Test Case 2:
Features: buying=vhigh, maint=vhigh, doors=2, persons=2, lug_boot=small, safety=low
Predicted Acceptability (Random Forest): vgood
Probabilities:
  acc: 0.0000
  good: 0.0000
  unacc: 1.0000
  vgood: 0.0000

Test Case 3:
Features: buying=low, maint=low, doors=4, persons=more, lug_boot=big, safety=high
Predicted Acceptability (Random Forest): vgood
Probabilities:
  acc: 0.0100
  good: 0.0000
  unacc: 0.0000
  vgood: 0.9900

--- Testing with Logistic Regression ---

Test Case 1:
Features: buying=med, maint=med, doors=2, persons=2, lug_boot=med, safety=high
Predicted Acceptability (Logistic Regression): vgood
Probabilities:
  acc: 0.0655
  good: 0.0095
  unacc: 0.9190
  vgood: 0.0061

Test Case 2:
Features: bu