In [1]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [11]:
import pandas as pd
from sklearn.model_selection import GridSearchCV,train_test_split
from sklearn.preprocessing import  LabelEncoder,MinMaxScaler,StandardScaler
from sklearn.tree import  DecisionTreeClassifier
from sklearn.metrics import confusion_matrix,f1_score,precision_score,recall_score,accuracy_score,classification_report

In [3]:
df=pd.read_csv('/content/drive/MyDrive/Datasets/car_evaluation.csv')
df.head()

Unnamed: 0,vhigh,vhigh.1,2,2.1,small,low,unacc
0,vhigh,vhigh,2,2,small,med,unacc
1,vhigh,vhigh,2,2,small,high,unacc
2,vhigh,vhigh,2,2,med,low,unacc
3,vhigh,vhigh,2,2,med,med,unacc
4,vhigh,vhigh,2,2,med,high,unacc


In [4]:
df.columns=['buying','maint','doors','persons','lug_boot','safety','class']
df.head()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class
0,vhigh,vhigh,2,2,small,med,unacc
1,vhigh,vhigh,2,2,small,high,unacc
2,vhigh,vhigh,2,2,med,low,unacc
3,vhigh,vhigh,2,2,med,med,unacc
4,vhigh,vhigh,2,2,med,high,unacc


In [None]:
df.shape
df.dtypes
df.duplicated().sum()
df.nunique()
df['buying'].value_counts()
df['maint'].value_counts()
df['doors'].value_counts()
df['persons'].value_counts()
df['lug_boot'].value_counts()
df['safety'].value_counts()
df['class'].value_counts()

df.info()
df.describe()
df.isnull().sum()

### LabelEncoder

In [5]:
label_encoders= {}
for column in df.columns:
    le=LabelEncoder()
    df[column]=le.fit_transform(df[column])
    label_encoders[column]=le
df.head()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class
0,3,3,0,0,2,2,2
1,3,3,0,0,2,0,2
2,3,3,0,0,1,1,2
3,3,3,0,0,1,2,2
4,3,3,0,0,1,0,2


In [6]:
X=df.drop(columns=['class'])
y=df['class']
X.shape,y.shape

((1727, 6), (1727,))

In [7]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)
X_train.shape,X_test.shape,y_train.shape,y_test.shape

((1381, 6), (346, 6), (1381,), (346,))

### fit_transform Vs transform

#### Standardization

In [None]:
scaler= StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)
print(f"Mean: ",X_train.mean(axis=0))
print(f"Standard Deviation: ",X_train.std(axis=0))

Mean:  [-7.46044147e-17  3.60159243e-17  7.71769807e-17  9.77575089e-17
  1.47922546e-16  1.28628301e-16]
Standard Deviation:  [1. 1. 1. 1. 1. 1.]


### Decision_Tree

In [21]:
dt_classifier=DecisionTreeClassifier(random_state=42)
dt_classifier.fit(X_train,y_train)
y_pred=dt_classifier.predict(X_test)
print(f"confution_matrix:\n",confusion_matrix(y_test,y_pred))
print(f"accuracy_score:\n",accuracy_score(y_test,y_pred))
print(f"recall_score:\n",recall_score(y_test,y_pred,average='micro'))
print(f"precision_score:\n",precision_score(y_test,y_pred,average='micro'))
print(f"f1_score:\n",f1_score(y_test,y_pred,average='micro'))
print(f"\nclassification_report:\n{classification_report(y_test,y_pred,target_names=label_encoders['class'].classes_)}")

confution_matrix:
 [[ 73   2   0   2]
 [  2  12   0   1]
 [  1   0 236   0]
 [  0   2   0  15]]
accuracy_score:
 0.9710982658959537
recall_score:
 0.9710982658959537
precision_score:
 0.9710982658959537
f1_score:
 0.9710982658959537

classification_report:
              precision    recall  f1-score   support

         acc       0.96      0.95      0.95        77
        good       0.75      0.80      0.77        15
       unacc       1.00      1.00      1.00       237
       vgood       0.83      0.88      0.86        17

    accuracy                           0.97       346
   macro avg       0.89      0.91      0.90       346
weighted avg       0.97      0.97      0.97       346



### Gread_Searchcv

In [27]:
dt_classifier=DecisionTreeClassifier(random_state=42)
param_grid={
    'max_depth' : [5,10,15,20,25],
    'min_samples_split' : [2,5,10,20],
    'min_samples_leaf' : [1,2,4,8],
    'max_features' : [None,'auto','sqrt','log2']
}

In [28]:
grid_search=GridSearchCV(estimator=dt_classifier,param_grid=param_grid,cv=5,scoring='accuracy',n_jobs=-1)
grid_search.fit(X_train,y_train)

400 fits failed out of a total of 1600.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
106 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/base.py", line 1145, in wrapper
    estimator._validate_params()
  File "/usr/local/lib/python3.10/dist-packages/sklearn/base.py", line 638, in _validate_params
    validate_parameter_constraints(
  File "/usr/local/lib/python3.10/dist-packages/sklearn/utils/_param_validation.py", line 96, in validate_parameter_constraints
    raise InvalidParameterError(
s

In [29]:
best_params=grid_search.best_params_
print(f"Best_Param : {best_params}")

Best_Param : {'max_depth': 15, 'max_features': None, 'min_samples_leaf': 1, 'min_samples_split': 2}


In [32]:
best_model=grid_search.best_estimator_
y_pred=best_model.predict(X_test)
report=classification_report(y_test,y_pred,target_names=label_encoders['class'].classes_)
print(report)

              precision    recall  f1-score   support

         acc       0.96      0.95      0.95        77
        good       0.75      0.80      0.77        15
       unacc       1.00      1.00      1.00       237
       vgood       0.83      0.88      0.86        17

    accuracy                           0.97       346
   macro avg       0.89      0.91      0.90       346
weighted avg       0.97      0.97      0.97       346

