In [5]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Load the datasets with updated file paths
train_data = pd.read_csv('/kaggle/input/fds-assignment/trainfile.csv')
test_data = pd.read_csv('/kaggle/input/fds-assignment/testfile.csv')

# Handle missing values
train_data['OwnHouse'].fillna(train_data['OwnHouse'].mode()[0], inplace=True)
train_data['HasCrCard'].fillna(train_data['HasCrCard'].mode()[0], inplace=True)
test_data['OwnHouse'].fillna(test_data['OwnHouse'].mode()[0], inplace=True)

# Encode categorical variables
label_encoder = LabelEncoder()
categorical_columns = ['Geography', 'Gender', 'Occupation']
for column in categorical_columns:
    train_data[column] = label_encoder.fit_transform(train_data[column])
    test_data[column] = label_encoder.transform(test_data[column])

# Scale numerical features
scaler = StandardScaler()
numerical_columns = ['CreditScore', 'Age', 'Tenure', 'Balance', 'EstimatedSalary']
train_data[numerical_columns] = scaler.fit_transform(train_data[numerical_columns])
test_data[numerical_columns] = scaler.transform(test_data[numerical_columns])

# Prepare training data
X = train_data.drop(columns=['row ID', 'CustomerId', 'Surname', 'Exited'])
y = train_data['Exited']

# Train-test split for validation
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Perform 20 iterations of Decision Tree with varying parameters
best_model = None
best_accuracy = 0
results = []

for i in range(1, 21):
    print(f"Iteration {i}")
    
    # Randomly select hyperparameters
    max_depth = np.random.randint(3, 20)
    min_samples_split = np.random.randint(2, 10)
    min_samples_leaf = np.random.randint(1, 10)
    
    # Train Decision Tree
    dt_model = DecisionTreeClassifier(
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        random_state=42
    )
    dt_model.fit(X_train, y_train)
    
    # Predict on validation data
    y_pred = dt_model.predict(X_val)
    accuracy = accuracy_score(y_val, y_pred)
    
    print(f"Parameters: max_depth={max_depth}, min_samples_split={min_samples_split}, min_samples_leaf={min_samples_leaf}")
    print(f"Accuracy: {accuracy}\n")
    
    # Save the best model
    if accuracy > best_accuracy:
        best_model = dt_model
        best_accuracy = accuracy
    
    results.append((max_depth, min_samples_split, min_samples_leaf, accuracy))

# Output the best model's results
print("Best Decision Tree Model:")
print(f"Accuracy: {best_accuracy}")
print("Confusion Matrix:")
print(confusion_matrix(y_val, best_model.predict(X_val)))
print("Classification Report:")
print(classification_report(y_val, best_model.predict(X_val)))

# Generate predictions for submission using the best model
test_features = test_data.drop(columns=['row ID', 'CustomerId', 'Surname'])
test_predictions = best_model.predict(test_features)

# Prepare submission file
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission.csv', index=False)

print("Submission file created successfully: submission.csv")


  train_data = pd.read_csv('/kaggle/input/fds-assignment/trainfile.csv')
  test_data = pd.read_csv('/kaggle/input/fds-assignment/testfile.csv')
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  train_data['OwnHouse'].fillna(train_data['OwnHouse'].mode()[0], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  train_data

Iteration 1
Parameters: max_depth=18, min_samples_split=3, min_samples_leaf=2
Accuracy: 0.8506005193681021

Iteration 2
Parameters: max_depth=6, min_samples_split=9, min_samples_leaf=6
Accuracy: 0.8580123349924259

Iteration 3
Parameters: max_depth=8, min_samples_split=6, min_samples_leaf=1
Accuracy: 0.8662627136983337

Iteration 4
Parameters: max_depth=17, min_samples_split=4, min_samples_leaf=6
Accuracy: 0.850871023587968

Iteration 5
Parameters: max_depth=17, min_samples_split=8, min_samples_leaf=3
Accuracy: 0.8507628219000216

Iteration 6
Parameters: max_depth=5, min_samples_split=3, min_samples_leaf=2
Accuracy: 0.855280242371781

Iteration 7
Parameters: max_depth=14, min_samples_split=8, min_samples_leaf=2
Accuracy: 0.8563352088292577

Iteration 8
Parameters: max_depth=10, min_samples_split=7, min_samples_leaf=1
Accuracy: 0.8640175286734473

Iteration 9
Parameters: max_depth=12, min_samples_split=6, min_samples_leaf=6
Accuracy: 0.8590673014499026

Iteration 10
Parameters: max_dept

In [6]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Decision Tree with specific parameters
dt_model = DecisionTreeClassifier(max_depth=5, min_samples_split=2, min_samples_leaf=1, random_state=42)
dt_model.fit(X_train, y_train)
y_pred = dt_model.predict(X_val)

print("Accuracy:", accuracy_score(y_val, y_pred))

test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission1.csv', index=False)


Accuracy: 0.855280242371781


In [7]:
dt_model = DecisionTreeClassifier(max_depth=10, min_samples_split=5, min_samples_leaf=2, random_state=42)
dt_model.fit(X_train, y_train)
y_pred = dt_model.predict(X_val)

print("Accuracy:", accuracy_score(y_val, y_pred))

test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission2.csv', index=False)

# Iteration 3
dt_model = DecisionTreeClassifier(max_depth=15, min_samples_split=3, min_samples_leaf=3, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission3.csv', index=False)

# Iteration 4
dt_model = DecisionTreeClassifier(max_depth=7, min_samples_split=4, min_samples_leaf=1, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission4.csv', index=False)

# Iteration 5
dt_model = DecisionTreeClassifier(max_depth=20, min_samples_split=6, min_samples_leaf=5, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission5.csv', index=False)

# Iteration 6
dt_model = DecisionTreeClassifier(max_depth=6, min_samples_split=2, min_samples_leaf=4, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission6.csv', index=False)

# Iteration 7
dt_model = DecisionTreeClassifier(max_depth=12, min_samples_split=7, min_samples_leaf=2, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission7.csv', index=False)

# Iteration 8
dt_model = DecisionTreeClassifier(max_depth=8, min_samples_split=3, min_samples_leaf=3, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission8.csv', index=False)

# Iteration 9
dt_model = DecisionTreeClassifier(max_depth=18, min_samples_split=5, min_samples_leaf=1, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission9.csv', index=False)

# Iteration 10
dt_model = DecisionTreeClassifier(max_depth=4, min_samples_split=8, min_samples_leaf=6, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission10.csv', index=False)

# Iteration 11
dt_model = DecisionTreeClassifier(max_depth=13, min_samples_split=9, min_samples_leaf=7, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission11.csv', index=False)

# Iteration 12
dt_model = DecisionTreeClassifier(max_depth=14, min_samples_split=4, min_samples_leaf=2, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission12.csv', index=False)

# Iteration 13
dt_model = DecisionTreeClassifier(max_depth=9, min_samples_split=2, min_samples_leaf=1, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission13.csv', index=False)

# Iteration 14
dt_model = DecisionTreeClassifier(max_depth=17, min_samples_split=6, min_samples_leaf=4, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission14.csv', index=False)

# Iteration 15
dt_model = DecisionTreeClassifier(max_depth=5, min_samples_split=3, min_samples_leaf=3, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission15.csv', index=False)

# Iteration 16
dt_model = DecisionTreeClassifier(max_depth=16, min_samples_split=8, min_samples_leaf=2, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission16.csv', index=False)

# Iteration 17
dt_model = DecisionTreeClassifier(max_depth=11, min_samples_split=5, min_samples_leaf=5, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission17.csv', index=False)

# Iteration 18
dt_model = DecisionTreeClassifier(max_depth=19, min_samples_split=7, min_samples_leaf=3, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission18.csv', index=False)

# Iteration 19
dt_model = DecisionTreeClassifier(max_depth=15, min_samples_split=9, min_samples_leaf=1, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission19.csv', index=False)

# Iteration 20
dt_model = DecisionTreeClassifier(max_depth=10, min_samples_split=3, min_samples_leaf=4, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission20.csv', index=False)

# Iteration 21
dt_model = DecisionTreeClassifier(max_depth=6, min_samples_split=2, min_samples_leaf=6, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission21.csv', index=False)

# Iteration 22
dt_model = DecisionTreeClassifier(max_depth=8, min_samples_split=4, min_samples_leaf=5, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission22.csv', index=False)

# Iteration 23
dt_model = DecisionTreeClassifier(max_depth=7, min_samples_split=6, min_samples_leaf=3, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission23.csv', index=False)

# Iteration 24
dt_model = DecisionTreeClassifier(max_depth=12, min_samples_split=7, min_samples_leaf=4, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission24.csv', index=False)



# Iteration 50
dt_model = DecisionTreeClassifier(max_depth=25, min_samples_split=2, min_samples_leaf=1, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission25.csv', index=False)

# Iteration 51
dt_model = DecisionTreeClassifier(max_depth=30, min_samples_split=5, min_samples_leaf=2, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission26.csv', index=False)

# Iteration 52
dt_model = DecisionTreeClassifier(max_depth=28, min_samples_split=3, min_samples_leaf=1, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission27.csv', index=False)

# Iteration 53
dt_model = DecisionTreeClassifier(max_depth=26, min_samples_split=4, min_samples_leaf=2, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission28.csv', index=False)

# Iteration 54
dt_model = DecisionTreeClassifier(max_depth=27, min_samples_split=3, min_samples_leaf=1, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission29.csv', index=False)



Accuracy: 0.8636117723436486


In [8]:
# Iteration 76
dt_model = DecisionTreeClassifier(max_depth=20, min_samples_split=10, min_samples_leaf=3, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission76.csv', index=False)

# Iteration 77
dt_model = DecisionTreeClassifier(max_depth=20, min_samples_split=9, min_samples_leaf=2, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission77.csv', index=False)

# Iteration 78
dt_model = DecisionTreeClassifier(max_depth=20, min_samples_split=8, min_samples_leaf=3, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission78.csv', index=False)

# Iteration 79
dt_model = DecisionTreeClassifier(max_depth=20, min_samples_split=7, min_samples_leaf=4, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission79.csv', index=False)

# Iteration 80
dt_model = DecisionTreeClassifier(max_depth=20, min_samples_split=6, min_samples_leaf=5, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission80.csv', index=False)

# Iteration 81
dt_model = DecisionTreeClassifier(max_depth=20, min_samples_split=5, min_samples_leaf=6, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission81.csv', index=False)

# Iteration 82
dt_model = DecisionTreeClassifier(max_depth=20, min_samples_split=4, min_samples_leaf=7, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission82.csv', index=False)

# Iteration 83
dt_model = DecisionTreeClassifier(max_depth=20, min_samples_split=3, min_samples_leaf=8, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission83.csv', index=False)

# Iteration 84
dt_model = DecisionTreeClassifier(max_depth=20, min_samples_split=2, min_samples_leaf=9, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission84.csv', index=False)

# Iteration 85
dt_model = DecisionTreeClassifier(max_depth=20, min_samples_split=10, min_samples_leaf=4, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission85.csv', index=False)

# Iteration 86
dt_model = DecisionTreeClassifier(max_depth=20, min_samples_split=9, min_samples_leaf=5, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission86.csv', index=False)

# Iteration 87
dt_model = DecisionTreeClassifier(max_depth=20, min_samples_split=8, min_samples_leaf=6, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission87.csv', index=False)

# Iteration 88
dt_model = DecisionTreeClassifier(max_depth=20, min_samples_split=7, min_samples_leaf=7, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission88.csv', index=False)

# Iteration 89
dt_model = DecisionTreeClassifier(max_depth=20, min_samples_split=6, min_samples_leaf=8, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission89.csv', index=False)

# Iteration 90
dt_model = DecisionTreeClassifier(max_depth=20, min_samples_split=5, min_samples_leaf=9, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission90.csv', index=False)

# Iteration 91
dt_model = DecisionTreeClassifier(max_depth=20, min_samples_split=4, min_samples_leaf=2, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission91.csv', index=False)

# Iteration 92
dt_model = DecisionTreeClassifier(max_depth=20, min_samples_split=3, min_samples_leaf=3, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission92.csv', index=False)

# Iteration 93
dt_model = DecisionTreeClassifier(max_depth=20, min_samples_split=2, min_samples_leaf=4, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission93.csv', index=False)

# Iteration 94
dt_model = DecisionTreeClassifier(max_depth=20, min_samples_split=10, min_samples_leaf=5, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission94.csv', index=False)

# Iteration 95
dt_model = DecisionTreeClassifier(max_depth=20, min_samples_split=9, min_samples_leaf=6, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission95.csv', index=False)

# Iteration 96
dt_model = DecisionTreeClassifier(max_depth=20, min_samples_split=8, min_samples_leaf=7, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission96.csv', index=False)

# Iteration 97
dt_model = DecisionTreeClassifier(max_depth=20, min_samples_split=7, min_samples_leaf=8, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission97.csv', index=False)

# Iteration 98
dt_model = DecisionTreeClassifier(max_depth=20, min_samples_split=6, min_samples_leaf=9, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission98.csv', index=False)


# Iteration 100
dt_model = DecisionTreeClassifier(max_depth=20, min_samples_split=9, min_samples_leaf=3, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission100.csv', index=False)

# Iteration 101
dt_model = DecisionTreeClassifier(max_depth=20, min_samples_split=8, min_samples_leaf=2, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission101.csv', index=False)

# Iteration 102
dt_model = DecisionTreeClassifier(max_depth=20, min_samples_split=7, min_samples_leaf=4, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission102.csv', index=False)

# Iteration 103
dt_model = DecisionTreeClassifier(max_depth=20, min_samples_split=6, min_samples_leaf=5, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission103.csv', index=False)

# Iteration 104
dt_model = DecisionTreeClassifier(max_depth=20, min_samples_split=5, min_samples_leaf=6, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission104.csv', index=False)

# Iteration 105
dt_model = DecisionTreeClassifier(max_depth=20, min_samples_split=4, min_samples_leaf=7, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission105.csv', index=False)

# Iteration 106
dt_model = DecisionTreeClassifier(max_depth=20, min_samples_split=3, min_samples_leaf=8, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission106.csv', index=False)

# Iteration 107
dt_model = DecisionTreeClassifier(max_depth=20, min_samples_split=2, min_samples_leaf=9, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission107.csv', index=False)

# Iteration 108
dt_model = DecisionTreeClassifier(max_depth=20, min_samples_split=7, min_samples_leaf=2, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission108.csv', index=False)

# Iteration 109
dt_model = DecisionTreeClassifier(max_depth=20, min_samples_split=6, min_samples_leaf=3, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission109.csv', index=False)

# Iteration 110
dt_model = DecisionTreeClassifier(max_depth=20, min_samples_split=5, min_samples_leaf=4, random_state=42)
dt_model.fit(X_train, y_train)
test_predictions = dt_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('submission110.csv', index=False)

