# Viva Credits

**Problem Statement:**  
Build and optimize a **Decision Tree Classifier** to predict binary class labels using **cross-validation** and **hyperparameter tuning**.

---

*By:-*  **Mowlick Armstrong**

## Hyperparameter Tuning

### Importing Libraries

In [None]:
from sklearn.datasets import make_classification
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
import numpy as np

In [None]:
X, y = make_classification(n_samples= 1000,
                           n_features = 5,
                           n_classes = 2)

In [None]:
type(X)

In [None]:
type(y)

In [None]:
print(X.shape)

In [None]:
print(y.shape)

In [None]:
import pandas as pd
df1 = pd.DataFrame(data = X, columns =['col1','col2','col3','col4','col5'])
df2 = pd.DataFrame(data = y, columns = ['Target Variable'])

In [None]:
df = pd.concat([df1, df2], axis = 1)

In [None]:
df.head()

In [None]:
X = df.drop('Target Variable', axis = 1)
y = df[['Target Variable']]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

## Applying Decision Tree

In [None]:
dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_test)
print("Accuracy score of the Decision Tree is", accuracy_score(y_test, y_pred_dt)*100)

## Applying  Hyperparameter Tuning

In [None]:
param_grid = {
    'max_depth': [3,5,10],
    'min_samples_split': [2,5,10],
    'criterion': ['gini', 'entropy']
}

In [None]:
grid_search = GridSearchCV(dt,
             param_grid = param_grid,
             scoring='accuracy',
             cv=5)
grid_search.fit(X_train, y_train)

In [None]:
grid_search.best_estimator_

In [None]:
grid_search.best_score_

In [None]:
grid_search.best_params_

In [None]:
print("Best Parameters:", grid_search.best_params_)
best_model = grid_search.best_estimator_

In [None]:
y_pred = best_model.predict(X_test)

In [None]:
X, y = make_classification(n_samples=1000, n_features=10, n_classes=2, random_state=42)

# 2. Initialize the model
clf = DecisionTreeClassifier(random_state=42)

# 3. Perform 5-fold cross-validation
scores = cross_val_score(clf, X, y, cv=5, scoring='accuracy')

# 4. Output results
print("Cross-validation scores:", scores)
print("Mean accuracy:", np.mean(scores))
print("Standard deviation:", np.std(scores))