In [3]:
# Step 1: Install scikit-learn if not already installed
!pip install scikit-learn

# Step 2: Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.feature_extraction.text import CountVectorizer

# Step 3: Load your dataset
# Replace 'your_dataset.csv' with your actual dataset file
df = pd.read_csv('road_condition.csv')

# Step 4: Preprocess the data
# Replace 'target_column', 'categorical_column', and 'text_column' with your actual column names
X_numeric = df.drop(['Road_Surface', 'Weather_Conditions', 'Road_Surface'], axis=1)

# Handle categorical data with one-hot encoding
X_categorical = pd.get_dummies(df['Weather_Conditions'], prefix='category')

# Handle text data with CountVectorizer
vectorizer = CountVectorizer()
X_text = vectorizer.fit_transform(df['Road_Surface'])
X_text_df = pd.DataFrame(X_text.toarray(), columns=vectorizer.get_feature_names_out())

# Concatenate numeric, categorical, and text features
X = pd.concat([X_numeric, X_categorical, X_text_df], axis=1)

y = df['Road_Surface']

# Step 5: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 6: Create and train the Decision Tree model
clf = DecisionTreeClassifier()
clf.fit(X_train, y_train)

# Step 7: Make predictions and evaluate the model
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f'Accuracy: {accuracy:.2f}')
print('\nClassification Report:')
print(classification_report(y_test, y_pred))

Accuracy: 1.00

Classification Report:
              precision    recall  f1-score   support

         Dry       1.00      1.00      1.00        24
 Frost / Ice       1.00      1.00      1.00         3
  Wet / Damp       1.00      1.00      1.00        13

    accuracy                           1.00        40
   macro avg       1.00      1.00      1.00        40
weighted avg       1.00      1.00      1.00        40

