# GaussianNB

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report, log_loss

# Load the data
df = pd.read_csv('ds3.csv')

# Convert categorical variables to numeric variables
le = LabelEncoder()
categorical_cols = df.select_dtypes(include=['object']).columns
df[categorical_cols] = df[categorical_cols].apply(lambda col: le.fit_transform(col))

# Separate features and target
X = df.drop('Response', axis=1)
y = df['Response']

# Split data into training, validation, and test sets
X_train, X_val_test, y_train, y_val_test = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_val_test, y_val_test, test_size=0.5, random_state=42)

# Initialize and fit the model on the training set
model = GaussianNB()
model.fit(X_train, y_train)

# Make predictions on the training set and calculate loss
y_pred_train = model.predict(X_train)
y_pred_train_proba = model.predict_proba(X_train)
train_accuracy = accuracy_score(y_train, y_pred_train)
train_loss = log_loss(y_train, y_pred_train_proba)

# Make predictions on the validation set and calculate loss
y_pred_val = model.predict(X_val)
y_pred_val_proba = model.predict_proba(X_val)
val_accuracy = accuracy_score(y_val, y_pred_val)
val_loss = log_loss(y_val, y_pred_val_proba)

# Print training and validation loss and accuracy
print("Training Loss: %.2f" % train_loss)
print("Training Accuracy: %.2f%%" % (train_accuracy * 100.0))
print("Validation Loss: %.2f" % val_loss)
print("Validation Accuracy: %.2f%%" % (val_accuracy * 100.0))

# Print precision, recall, and F1 score for the validation set
print(classification_report(y_val, y_pred_val))


Training Loss: 0.38
Training Accuracy: 85.95%
Validation Loss: 0.41
Validation Accuracy: 83.92%
              precision    recall  f1-score   support

           0       0.84      1.00      0.91      1007
           1       0.00      0.00      0.00       193

    accuracy                           0.84      1200
   macro avg       0.42      0.50      0.46      1200
weighted avg       0.70      0.84      0.77      1200



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# Logistic Regression

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, log_loss

# Load the data
df = pd.read_csv('ds3.csv')

# Convert categorical variables to numeric variables
le = LabelEncoder()
categorical_cols = df.select_dtypes(include=['object']).columns
df[categorical_cols] = df[categorical_cols].apply(lambda col: le.fit_transform(col))

# Separate features and target
X = df.drop('Response', axis=1)
y = df['Response']

# Split data into training, validation, and test sets
X_train, X_val_test, y_train, y_val_test = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_val_test, y_val_test, test_size=0.5, random_state=42)

# Initialize and fit the model on the training set
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Make predictions on the training set and calculate loss
y_pred_train = model.predict(X_train)
y_pred_train_proba = model.predict_proba(X_train)
train_accuracy = accuracy_score(y_train, y_pred_train)
train_loss = log_loss(y_train, y_pred_train_proba)

# Make predictions on the validation set and calculate loss
y_pred_val = model.predict(X_val)
y_pred_val_proba = model.predict_proba(X_val)
val_accuracy = accuracy_score(y_val, y_pred_val)
val_loss = log_loss(y_val, y_pred_val_proba)

# Print training and validation loss and accuracy
print("Training Loss: %.2f" % train_loss)
print("Training Accuracy: %.2f%%" % (train_accuracy * 100.0))
print("Validation Loss: %.2f" % val_loss)
print("Validation Accuracy: %.2f%%" % (val_accuracy * 100.0))

# Print precision, recall, and F1 score for the validation set
print(classification_report(y_val, y_pred_val))


Training Loss: 0.39
Training Accuracy: 85.95%
Validation Loss: 0.43
Validation Accuracy: 83.92%
              precision    recall  f1-score   support

           0       0.84      1.00      0.91      1007
           1       0.00      0.00      0.00       193

    accuracy                           0.84      1200
   macro avg       0.42      0.50      0.46      1200
weighted avg       0.70      0.84      0.77      1200



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# generalized_linear_model

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, log_loss
import statsmodels.api as sm

# Load the data
df = pd.read_csv('ds3.csv')

# Convert categorical variables to numeric variables
le = LabelEncoder()
categorical_cols = df.select_dtypes(include=['object']).columns
df[categorical_cols] = df[categorical_cols].apply(lambda col: le.fit_transform(col))

# Separate features and target
X = df.drop('Response', axis=1)
y = df['Response']

# Split data into training, validation, and test sets
X_train, X_val_test, y_train, y_val_test = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_val_test, y_val_test, test_size=0.5, random_state=42)

# Add a constant term for the intercept
X_train = sm.add_constant(X_train)
X_val = sm.add_constant(X_val)
X_test = sm.add_constant(X_test)

# Initialize and fit the GLM model on the training set
model = sm.GLM(y_train, X_train, family=sm.families.Binomial())
result = model.fit()

# Make predictions on the training set and calculate loss
y_pred_train_proba = result.predict(X_train)
y_pred_train = (y_pred_train_proba > 0.5).astype(int)
train_accuracy = accuracy_score(y_train, y_pred_train)
train_loss = log_loss(y_train, y_pred_train_proba)

# Make predictions on the validation set and calculate loss
y_pred_val_proba = result.predict(X_val)
y_pred_val = (y_pred_val_proba > 0.5).astype(int)
val_accuracy = accuracy_score(y_val, y_pred_val)
val_loss = log_loss(y_val, y_pred_val_proba)

# Print training and validation loss and accuracy
print("Training Loss: %.2f" % train_loss)
print("Training Accuracy: %.2f%%" % (train_accuracy * 100.0))
print("Validation Loss: %.2f" % val_loss)
print("Validation Accuracy: %.2f%%" % (val_accuracy * 100.0))

# Print precision, recall, and F1 score for the validation set
print(classification_report(y_val, y_pred_val))


Training Loss: 0.38
Training Accuracy: 85.89%
Validation Loss: 0.43
Validation Accuracy: 83.92%
              precision    recall  f1-score   support

           0       0.84      1.00      0.91      1007
           1       0.00      0.00      0.00       193

    accuracy                           0.84      1200
   macro avg       0.42      0.50      0.46      1200
weighted avg       0.70      0.84      0.77      1200



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# LogisticRegression

In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, log_loss
import statsmodels.api as sm

# Load the data
df = pd.read_csv('ds3.csv')

# Convert categorical variables to numeric variables
le = LabelEncoder()
categorical_cols = df.select_dtypes(include=['object']).columns
df[categorical_cols] = df[categorical_cols].apply(lambda col: le.fit_transform(col))

# Separate features and target
X = df.drop('Response', axis=1)
y = df['Response']

# Split data into training, validation, and test sets
X_train, X_val_test, y_train, y_val_test = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_val_test, y_val_test, test_size=0.5, random_state=42)

# Add a constant term for the intercept
X_train = sm.add_constant(X_train)
X_val = sm.add_constant(X_val)
X_test = sm.add_constant(X_test)

# Initialize and fit the Logistic Regression model on the training set
model = sm.Logit(y_train, X_train)
result = model.fit()

# Make predictions on the training set and calculate loss
y_pred_train_proba = result.predict(X_train)
y_pred_train = (y_pred_train_proba > 0.5).astype(int)
train_accuracy = accuracy_score(y_train, y_pred_train)
train_loss = log_loss(y_train, y_pred_train_proba)

# Make predictions on the validation set and calculate loss
y_pred_val_proba = result.predict(X_val)
y_pred_val = (y_pred_val_proba > 0.5).astype(int)
val_accuracy = accuracy_score(y_val, y_pred_val)
val_loss = log_loss(y_val, y_pred_val_proba)

# Print training and validation loss and accuracy
print("Training Loss: %.2f" % train_loss)
print("Training Accuracy: %.2f%%" % (train_accuracy * 100.0))
print("Validation Loss: %.2f" % val_loss)
print("Validation Accuracy: %.2f%%" % (val_accuracy * 100.0))

# Print precision, recall, and F1 score for the validation set
print(classification_report(y_val, y_pred_val))


Optimization terminated successfully.
         Current function value: 0.377464
         Iterations 7
Training Loss: 0.38
Training Accuracy: 85.89%
Validation Loss: 0.43
Validation Accuracy: 83.92%
              precision    recall  f1-score   support

           0       0.84      1.00      0.91      1007
           1       0.00      0.00      0.00       193

    accuracy                           0.84      1200
   macro avg       0.42      0.50      0.46      1200
weighted avg       0.70      0.84      0.77      1200



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# Deep Learning

In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, classification_report, log_loss
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical

# Load the data
df = pd.read_csv('ds3.csv')

# Convert categorical variables to numeric variables
le = LabelEncoder()
categorical_cols = df.select_dtypes(include=['object']).columns
df[categorical_cols] = df[categorical_cols].apply(lambda col: le.fit_transform(col))

# Separate features and target
X = df.drop('Response', axis=1)
y = df['Response']

# One-hot encode the target variable if it's categorical
y = to_categorical(y)

# Split data into training, validation, and test sets
X_train, X_val_test, y_train, y_val_test = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_val_test, y_val_test, test_size=0.5, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

# Initialize and build the neural network model
model = Sequential()
model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(y_train.shape[1], activation='softmax'))

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=50, batch_size=32)

# Make predictions on the training set and calculate loss
y_pred_train_proba = model.predict(X_train)
y_pred_train = y_pred_train_proba.argmax(axis=1)
train_accuracy = accuracy_score(y_train.argmax(axis=1), y_pred_train)
train_loss = log_loss(y_train, y_pred_train_proba)

# Make predictions on the validation set and calculate loss
y_pred_val_proba = model.predict(X_val)
y_pred_val = y_pred_val_proba.argmax(axis=1)
val_accuracy = accuracy_score(y_val.argmax(axis=1), y_pred_val)
val_loss = log_loss(y_val, y_pred_val_proba)

# Print training and validation loss and accuracy
print("Training Loss: %.2f" % train_loss)
print("Training Accuracy: %.2f%%" % (train_accuracy * 100.0))
print("Validation Loss: %.2f" % val_loss)
print("Validation Accuracy: %.2f%%" % (val_accuracy * 100.0))

# Print precision, recall, and F1 score for the validation set
print(classification_report(y_val.argmax(axis=1), y_pred_val))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.8144 - loss: 0.4501 - val_accuracy: 0.8392 - val_loss: 0.4130
Epoch 2/50
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8589 - loss: 0.3544 - val_accuracy: 0.8392 - val_loss: 0.4039
Epoch 3/50
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8602 - loss: 0.3373 - val_accuracy: 0.8400 - val_loss: 0.3808
Epoch 4/50
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8632 - loss: 0.3140 - val_accuracy: 0.8367 - val_loss: 0.3646
Epoch 5/50
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8706 - loss: 0.2868 - val_accuracy: 0.8408 - val_loss: 0.3451
Epoch 6/50
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8735 - loss: 0.2745 - val_accuracy: 0.8458 - val_loss: 0.3420
Epoch 7/50
[1m175/175[0m 

# KNeighborsClassifier

In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, log_loss

# Load the data
df = pd.read_csv('ds3.csv')

# Convert categorical variables to numeric variables
le = LabelEncoder()
categorical_cols = df.select_dtypes(include=['object']).columns
df[categorical_cols] = df[categorical_cols].apply(lambda col: le.fit_transform(col))

# Separate features and target
X = df.drop('Response', axis=1)
y = df['Response']

# Split data into training, validation, and test sets
X_train, X_val_test, y_train, y_val_test = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_val_test, y_val_test, test_size=0.5, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

# Initialize and fit the KNeighborsClassifier model on the training set
model = KNeighborsClassifier(n_neighbors=5)
model.fit(X_train, y_train)

# Make predictions on the training set and calculate loss
y_pred_train = model.predict(X_train)
y_pred_train_proba = model.predict_proba(X_train)
train_accuracy = accuracy_score(y_train, y_pred_train)
train_loss = log_loss(y_train, y_pred_train_proba)

# Make predictions on the validation set and calculate loss
y_pred_val = model.predict(X_val)
y_pred_val_proba = model.predict_proba(X_val)
val_accuracy = accuracy_score(y_val, y_pred_val)
val_loss = log_loss(y_val, y_pred_val_proba)

# Print training and validation loss and accuracy
print("Training Loss: %.2f" % train_loss)
print("Training Accuracy: %.2f%%" % (train_accuracy * 100.0))
print("Validation Loss: %.2f" % val_loss)
print("Validation Accuracy: %.2f%%" % (val_accuracy * 100.0))

# Print precision, recall, and F1 score for the validation set
print(classification_report(y_val, y_pred_val))


Training Loss: 0.17
Training Accuracy: 94.71%
Validation Loss: 0.29
Validation Accuracy: 88.50%
              precision    recall  f1-score   support

           0       0.93      0.94      0.93      1007
           1       0.65      0.61      0.63       193

    accuracy                           0.89      1200
   macro avg       0.79      0.77      0.78      1200
weighted avg       0.88      0.89      0.88      1200



# LinearDiscriminantAnalysis

In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import accuracy_score, classification_report, log_loss

# Load the data
df = pd.read_csv('ds3.csv')

# Convert categorical variables to numeric variables
le = LabelEncoder()
categorical_cols = df.select_dtypes(include=['object']).columns
df[categorical_cols] = df[categorical_cols].apply(lambda col: le.fit_transform(col))

# Separate features and target
X = df.drop('Response', axis=1)
y = df['Response']

# Split data into training, validation, and test sets
X_train, X_val_test, y_train, y_val_test = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_val_test, y_val_test, test_size=0.5, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

# Initialize and fit the LinearDiscriminantAnalysis model on the training set
model = LinearDiscriminantAnalysis()
model.fit(X_train, y_train)

# Make predictions on the training set and calculate loss
y_pred_train = model.predict(X_train)
y_pred_train_proba = model.predict_proba(X_train)
train_accuracy = accuracy_score(y_train, y_pred_train)
train_loss = log_loss(y_train, y_pred_train_proba)

# Make predictions on the validation set and calculate loss
y_pred_val = model.predict(X_val)
y_pred_val_proba = model.predict_proba(X_val)
val_accuracy = accuracy_score(y_val, y_pred_val)
val_loss = log_loss(y_val, y_pred_val_proba)

# Print training and validation loss and accuracy
print("Training Loss: %.2f" % train_loss)
print("Training Accuracy: %.2f%%" % (train_accuracy * 100.0))
print("Validation Loss: %.2f" % val_loss)
print("Validation Accuracy: %.2f%%" % (val_accuracy * 100.0))

# Print precision, recall, and F1 score for the validation set
print(classification_report(y_val, y_pred_val))


Training Loss: 0.38
Training Accuracy: 85.91%
Validation Loss: 0.43
Validation Accuracy: 83.92%
              precision    recall  f1-score   support

           0       0.84      1.00      0.91      1007
           1       0.00      0.00      0.00       193

    accuracy                           0.84      1200
   macro avg       0.42      0.50      0.46      1200
weighted avg       0.70      0.84      0.77      1200



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# QuadraticDiscriminantAnalysis

In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.metrics import accuracy_score, classification_report, log_loss

# Load the data
df = pd.read_csv('ds3.csv')

# Convert categorical variables to numeric variables
le = LabelEncoder()
categorical_cols = df.select_dtypes(include=['object']).columns
df[categorical_cols] = df[categorical_cols].apply(lambda col: le.fit_transform(col))

# Separate features and target
X = df.drop('Response', axis=1)
y = df['Response']

# Split data into training, validation, and test sets
X_train, X_val_test, y_train, y_val_test = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_val_test, y_val_test, test_size=0.5, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

# Initialize and fit the QuadraticDiscriminantAnalysis model on the training set
model = QuadraticDiscriminantAnalysis()
model.fit(X_train, y_train)

# Make predictions on the training set and calculate loss
y_pred_train = model.predict(X_train)
y_pred_train_proba = model.predict_proba(X_train)
train_accuracy = accuracy_score(y_train, y_pred_train)
train_loss = log_loss(y_train, y_pred_train_proba)

# Make predictions on the validation set and calculate loss
y_pred_val = model.predict(X_val)
y_pred_val_proba = model.predict_proba(X_val)
val_accuracy = accuracy_score(y_val, y_pred_val)
val_loss = log_loss(y_val, y_pred_val_proba)

# Print training and validation loss and accuracy
print("Training Loss: %.2f" % train_loss)
print("Training Accuracy: %.2f%%" % (train_accuracy * 100.0))
print("Validation Loss: %.2f" % val_loss)
print("Validation Accuracy: %.2f%%" % (val_accuracy * 100.0))

# Print precision, recall, and F1 score for the validation set
print(classification_report(y_val, y_pred_val))


Training Loss: 0.31
Training Accuracy: 86.75%
Validation Loss: 0.37
Validation Accuracy: 83.67%
              precision    recall  f1-score   support

           0       0.87      0.94      0.91      1007
           1       0.49      0.28      0.36       193

    accuracy                           0.84      1200
   macro avg       0.68      0.61      0.63      1200
weighted avg       0.81      0.84      0.82      1200



# GaussianProcessClassifier

In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.metrics import accuracy_score, classification_report, log_loss

# Load the data
df = pd.read_csv('ds3.csv')

# Convert categorical variables to numeric variables
le = LabelEncoder()
categorical_cols = df.select_dtypes(include=['object']).columns
df[categorical_cols] = df[categorical_cols].apply(lambda col: le.fit_transform(col))

# Separate features and target
X = df.drop('Response', axis=1)
y = df['Response']

# Split data into training, validation, and test sets
X_train, X_val_test, y_train, y_val_test = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_val_test, y_val_test, test_size=0.5, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

# Initialize and fit the GaussianProcessClassifier model on the training set
kernel = 1.0 * RBF(1.0)
model = GaussianProcessClassifier(kernel=kernel, random_state=42)
model.fit(X_train, y_train)

# Make predictions on the training set and calculate loss
y_pred_train = model.predict(X_train)
y_pred_train_proba = model.predict_proba(X_train)
train_accuracy = accuracy_score(y_train, y_pred_train)
train_loss = log_loss(y_train, y_pred_train_proba)

# Make predictions on the validation set and calculate loss
y_pred_val = model.predict(X_val)
y_pred_val_proba = model.predict_proba(X_val)
val_accuracy = accuracy_score(y_val, y_pred_val)
val_loss = log_loss(y_val, y_pred_val_proba)

# Print training and validation loss and accuracy
print("Training Loss: %.2f" % train_loss)
print("Training Accuracy: %.2f%%" % (train_accuracy * 100.0))
print("Validation Loss: %.2f" % val_loss)
print("Validation Accuracy: %.2f%%" % (val_accuracy * 100.0))

# Print precision, recall, and F1 score for the validation set
print(classification_report(y_val, y_pred_val))


KeyboardInterrupt: 

# AdaBoostClassifier

In [13]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score, classification_report, log_loss

# Load the data
df = pd.read_csv('ds3.csv')

# Convert categorical variables to numeric variables
le = LabelEncoder()
categorical_cols = df.select_dtypes(include=['object']).columns
df[categorical_cols] = df[categorical_cols].apply(lambda col: le.fit_transform(col))

# Separate features and target
X = df.drop('Response', axis=1)
y = df['Response']

# Split data into training, validation, and test sets
X_train, X_val_test, y_train, y_val_test = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_val_test, y_val_test, test_size=0.5, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

# Initialize and fit the AdaBoostClassifier model on the training set
model = AdaBoostClassifier(n_estimators=50, random_state=42)
model.fit(X_train, y_train)

# Make predictions on the training set and calculate loss
y_pred_train = model.predict(X_train)
y_pred_train_proba = model.predict_proba(X_train)
train_accuracy = accuracy_score(y_train, y_pred_train)
train_loss = log_loss(y_train, y_pred_train_proba)

# Make predictions on the validation set and calculate loss
y_pred_val = model.predict(X_val)
y_pred_val_proba = model.predict_proba(X_val)
val_accuracy = accuracy_score(y_val, y_pred_val)
val_loss = log_loss(y_val, y_pred_val_proba)

# Print training and validation loss and accuracy
print("Training Loss: %.2f" % train_loss)
print("Training Accuracy: %.2f%%" % (train_accuracy * 100.0))
print("Validation Loss: %.2f" % val_loss)
print("Validation Accuracy: %.2f%%" % (val_accuracy * 100.0))

# Print precision, recall, and F1 score for the validation set
print(classification_report(y_val, y_pred_val))


Training Loss: 0.63
Training Accuracy: 87.29%
Validation Loss: 0.64
Validation Accuracy: 84.83%
              precision    recall  f1-score   support

           0       0.86      0.98      0.92      1007
           1       0.63      0.14      0.23       193

    accuracy                           0.85      1200
   macro avg       0.74      0.56      0.57      1200
weighted avg       0.82      0.85      0.81      1200



# DecisionTreeClassifier

In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, log_loss

# Load the data
df = pd.read_csv('ds3.csv')

# Convert categorical variables to numeric variables
le = LabelEncoder()
categorical_cols = df.select_dtypes(include=['object']).columns
df[categorical_cols] = df[categorical_cols].apply(lambda col: le.fit_transform(col))

# Separate features and target
X = df.drop('Response', axis=1)
y = df['Response']

# Split data into training, validation, and test sets
X_train, X_val_test, y_train, y_val_test = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_val_test, y_val_test, test_size=0.5, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

# Initialize and fit the DecisionTreeClassifier model on the training set
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

# Make predictions on the training set and calculate loss
y_pred_train = model.predict(X_train)
y_pred_train_proba = model.predict_proba(X_train)
train_accuracy = accuracy_score(y_train, y_pred_train)
train_loss = log_loss(y_train, y_pred_train_proba)

# Make predictions on the validation set and calculate loss
y_pred_val = model.predict(X_val)
y_pred_val_proba = model.predict_proba(X_val)
val_accuracy = accuracy_score(y_val, y_pred_val)
val_loss = log_loss(y_val, y_pred_val_proba)

# Print training and validation loss and accuracy
print("Training Loss: %.2f" % train_loss)
print("Training Accuracy: %.2f%%" % (train_accuracy * 100.0))
print("Validation Loss: %.2f" % val_loss)
print("Validation Accuracy: %.2f%%" % (val_accuracy * 100.0))

# Print precision, recall, and F1 score for the validation set
print(classification_report(y_val, y_pred_val))


Training Loss: 0.00
Training Accuracy: 100.00%
Validation Loss: 1.50
Validation Accuracy: 95.83%
              precision    recall  f1-score   support

           0       1.00      0.95      0.97      1007
           1       0.80      0.99      0.88       193

    accuracy                           0.96      1200
   macro avg       0.90      0.97      0.93      1200
weighted avg       0.97      0.96      0.96      1200



# XGBClassifier

In [17]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, classification_report, log_loss
from xgboost import XGBClassifier

# Load the data
df = pd.read_csv('ds3.csv')

# Convert categorical variables to numeric variables
le = LabelEncoder()
categorical_cols = df.select_dtypes(include=['object']).columns
df[categorical_cols] = df[categorical_cols].apply(lambda col: le.fit_transform(col))

# Separate features and target
X = df.drop('Response', axis=1)
y = df['Response']

# Split data into training, validation, and test sets
X_train, X_val_test, y_train, y_val_test = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_val_test, y_val_test, test_size=0.5, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

# Initialize and fit the XGBClassifier model on the training set
model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
model.fit(X_train, y_train)

# Make predictions on the training set and calculate loss
y_pred_train = model.predict(X_train)
y_pred_train_proba = model.predict_proba(X_train)
train_accuracy = accuracy_score(y_train, y_pred_train)
train_loss = log_loss(y_train, y_pred_train_proba)

# Make predictions on the validation set and calculate loss
y_pred_val = model.predict(X_val)
y_pred_val_proba = model.predict_proba(X_val)
val_accuracy = accuracy_score(y_val, y_pred_val)
val_loss = log_loss(y_val, y_pred_val_proba)

# Print training and validation loss and accuracy
print("Training Loss: %.2f" % train_loss)
print("Training Accuracy: %.2f%%" % (train_accuracy * 100.0))
print("Validation Loss: %.2f" % val_loss)
print("Validation Accuracy: %.2f%%" % (val_accuracy * 100.0))

# Print precision, recall, and F1 score for the validation set
print(classification_report(y_val, y_pred_val))


Training Loss: 0.01
Training Accuracy: 100.00%
Validation Loss: 0.05
Validation Accuracy: 99.17%
              precision    recall  f1-score   support

           0       1.00      0.99      1.00      1007
           1       0.95      1.00      0.97       193

    accuracy                           0.99      1200
   macro avg       0.98      1.00      0.98      1200
weighted avg       0.99      0.99      0.99      1200



Parameters: { "use_label_encoder" } are not used.

