**CODES FOR THE MODELS/ALGOLITHMS**

In [None]:
# Importing necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, jaccard_score, roc_auc_score, confusion_matrix, classification_report
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

In [None]:
# dataset=pd.read_csv("data/dataset with dummies.csv")
dataset=pd.read_excel("/content/DATASET2020/21.xlsx")
dataset.head()

In [None]:
# convert Povertystatus to categorical data type
dataset['Povert status'] = dataset['Povert status'].astype('category')
dataset.dtypes

In [None]:
# chack if there is Null values in our dataset
dataset.isnull().sum()

In [None]:
# categorical columns
dataset['Povert status/Clustered poverty'].value_counts()

In [None]:
# independent variable dataset
X=dataset.loc[:, dataset.columns !='Povert status']
columns=list(X.columns)
# # dependent variable dataset
y=dataset[['Povert status']]
# putting values of independent variable on scale
# FEATURE SCALING
sc = StandardScaler()
X = sc.fit_transform(X)
# comvert again to dataframe
X = pd.DataFrame(X, columns=columns)

**RBF Kernel**

In [None]:
# Importing required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA, KernelPCA
from sklearn.metrics import classification_report, roc_auc_score, jaccard_score
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.svm import SVC
from sklearn.metrics import roc_curve

In [None]:
# Split the dataset into features and labels
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [None]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y )

In [None]:
# Perform PCA on the training set
pca = PCA(n_components=32)
X_train_pca = pca.fit_transform(X_train)

In [None]:
# Perform hyperparameter tuning using GridSearchCV
param_grid = {'C': [0.1,0.2,0.3,0.4,0.5,0.6 1, ], 'gamma': [0.1, 0,2, 0.4, 0.6, 0.8, 1, 4,8, 10]}
grid = GridSearchCV(SVC(kernel='rbf'), param_grid=param_grid, cv=5)
grid.fit(X_train_pca, y_train)

In [None]:
# Train the model with the best hyperparameters
clf = SVC(kernel='rbf', C=grid.best_params_['C'], gamma=grid.best_params_['gamma'])
clf.fit(X_train_pca, y_train)

In [None]:
# Perform cross-validation on the training set
scores = cross_val_score(clf, X_train_pca, y_train, cv=5)

In [None]:
# Introduce weighted categorical cross-entropy in the training dataset
class_weights = {0: 1.0, 1: 0}
clf_weighted = SVC(kernel='rbf', C=grid.best_params_['C'], gamma=grid.best_params_['gamma'], class_weight=class_weights)
clf_weighted.fit(X_train_pca, y_train)

In [None]:
# Predict the labels of the testing set using the trained model
X_test_pca = pca.transform(X_test)
y_pred = clf.predict(X_test_pca)
y_pred_weighted = clf_weighted.predict(X_test_pca)

In [None]:
# Plot AUC score for multiple models for multiclass classification in Python
roc_auc_score(y_test, y_pred_weighted)

In [None]:
# Plot ROC curve in Python
fpr, tpr, thresholds = roc_curve(y_test, y_pred_weighted)
plt.plot(fpr,tpr,label="Data 1")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.legend(loc=4)
plt.show()

# Plot Classification report in Python
print(classification_report(y_test,y_pred_weighted))

# Plot Jaccard score in Python
jaccard_score(y_test,y_pred_weighted)

In [None]:
# Plot Jaccard score in Python
jaccard_score(y_test,y_pred_weighted)

**linear Kernal**

In [None]:
# Importing required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA, KernelPCA
from sklearn.metrics import classification_report, roc_auc_score, jaccard_score
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.svm import SVC

In [None]:
# Split the dataset into features and labels
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [None]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y)

In [None]:
# Perform PCA on the training set
pca = PCA(n_components=32)
X_train_pca = pca.fit_transform(X_train)

In [None]:
# Perform hyperparameter tuning using GridSearchCV
param_grid = {'C': [0.1,0.2,0.3,0.4,0.5,0.6 1, ], 'gamma': [0.1, 0,2, 0.4, 0.6, 0.8, 1, 4,8, 10]}
grid = GridSearchCV(SVC(kernel='linear'), param_grid=param_grid, cv=5)
grid.fit(X_train_pca, y_train)

In [None]:
# Train the model with the best hyperparameters
clf = SVC(kernel='linear', C=grid.best_params_['C'], gamma=grid.best_params_['gamma'])
clf.fit(X_train_pca, y_train)

In [None]:
# Perform cross-validation on the training set
scores = cross_val_score(clf, X_train_pca, y_train, cv=5)

In [None]:
# Introduce weighted categorical cross-entropy in the training dataset
class_weights = {0: 1.0, 1: 0}
clf_weighted = SVC(kernel='linear', C=grid.best_params_['C'], gamma=grid.best_params_['gamma'], class_weight=class_weights)
clf_weighted.fit(X_train_pca, y_train)

In [None]:
# Predict the labels of the testing set using the trained model
X_test_pca = pca.transform(X_test)
y_pred = clf.predict(X_test_pca)
y_pred_weighted = clf_weighted.predict(X_test_pca)

In [None]:
Plot AUC score for binary classification in Python
roc_auc_score(y_test, y_pred_weighted)

In [None]:
# Plot AUC score for binary classification in Python
roc_auc_score(y_test, y_pred_weighted)

# Plot ROC curve in Python for binary classification
fpr, tpr, thresholds = roc_curve(y_test, y_pred_weighted)
plt.plot(fpr,tpr,label="data 1")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.legend(loc=4)
plt.show()

# Plot Classification report in Python for binary classification
print(classification_report(y_test,y_pred_weighted))

# Plot Jaccard score in Python for binary classification
jaccard_score(y_test,y_pred_weighted)

**Polynomial Kernel**

In [None]:
# Importing required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA, KernelPCA
from sklearn.metrics import classification_report, roc_auc_score, jaccard_score
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.svm import SVC

In [None]:
# Split the dataset into features and labels
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [None]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y)

In [None]:
# Perform PCA on the training set
pca = PCA(n_components=32)
X_train_pca = pca.fit_transform(X_train)

In [None]:
# Perform hyperparameter tuning using GridSearchCV
param_grid = {'C': [0.1,0.2,0.3,0.4,0.5,0.6 1, ], 'gamma': [0.1, 0,2, 0.4, 0.6, 0.8, 1, 4,8, 10]}
grid = GridSearchCV(SVC(kernel='poly'), param_grid=param_grid, cv=5)
grid.fit(X_train_pca, y_train)

In [None]:
# Train the model with the best hyperparameters
clf = SVC(kernel='poly', C=grid.best_params_['C'], gamma=grid.best_params_['gamma'])
clf.fit(X_train_pca, y_train)

In [None]:
# Perform cross-validation on the training set
scores = cross_val_score(clf, X_train_pca, y_train, cv=5)

In [None]:
# Introduce weighted categorical cross-entropy in the training dataset
class_weights = {0: 1.0, 1: 0}
clf_weighted = SVC(kernel='poly', C=grid.best_params_['C'], gamma=grid.best_params_['gamma'], class_weight=class_weights)
clf_weighted.fit(X_train_pca, y_train)

In [None]:
# Predict the labels of the testing set using the trained model
X_test_pca = pca.transform(X_test)
y_pred = clf.predict(X_test_pca)
y_pred_weighted = clf_weighted.predict(X_test_pca)

In [None]:
# Plot AUC score for binary classification in Python
roc_auc_score(y_test, y_pred_weighted)

In [None]:
# Plot ROC curve in Python for binary classification
fpr, tpr, thresholds = roc_curve(y_test, y_pred_weighted)
plt.plot(fpr,tpr,label="data 1")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.legend(loc=4)
plt.show()

# Plot Classification report in Python for binary classification
print(classification_report(y_test,y_pred_weighted))

# Plot Jaccard score in Python for binary classification
jaccard_score(y_test,y_pred_weighted)

**XGBoosting**

In [None]:
# Importing required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report, roc_auc_score, jaccard_score, matthews_corrcoef, confusion_matrix
from sklearn.model_selection import GridSearchCV, train_test_split
from xgboost import XGBClassifier

In [None]:
# Split the dataset into features and labels
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [None]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y)

In [None]:
# Perform PCA on the training set
pca = PCA(n_components=32)
X_train_pca = pca.fit_transform(X_train)

In [None]:
# Perform hyperparameter tuning using GridSearchCV
param_grid = {'max_depth': [2,3, 4, 5, 6, 7,8], 'learning_rate': [0.1, 0.01, 0.2,0.3], 'n_estimators': [50, 100, 200, 250]}
grid = GridSearchCV(XGBClassifier(), param_grid=param_grid, cv=5)
grid.fit(X_train_pca, y_train)

In [None]:
# Train the model with the best hyperparameters
clf = XGBClassifier(max_depth=grid.best_params_['max_depth'], learning_rate=grid.best_params_['learning_rate'], n_estimators=grid.best_params_['n_estimators'])
clf.fit(X_train_pca, y_train)

In [None]:
# Perform cross-validation on the training set
scores = cross_val_score(clf, X_train_pca, y_train, cv=5)

In [None]:
# Introduce weighted categorical cross-entropy in the training dataset
class_weights = {0: 1.0, 1: 0}
clf_weighted = XGBClassifier(max_depth=grid.best_params_['max_depth'], learning_rate=grid.best_params_['learning_rate'], n_estimators=grid.best_params_['n_estimators'], scale_pos_weight=class_weights[1])
clf_weighted.fit(X_train_pca, y_train)

In [None]:
# Predict the labels of the testing set using the trained model
X_test_pca = pca.transform(X_test)
y_pred = clf.predict(X_test_pca)
y_pred_weighted = clf_weighted.predict(X_test_pca)

In [None]:
# Plot AUC score for binary classification in Python
roc_auc_score(y_test, y_pred_weighted)

In [None]:
# Plot ROC curve in Python for binary classification
fpr, tpr, thresholds = roc_curve(y_test, y_pred_weighted)
plt.plot(fpr,tpr,label="data 1")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.legend(loc=4)
plt.show()

In [None]:
# Plot Classification report in Python for binary classification
print(classification_report(y_test,y_pred_weighted))

# Plot Jaccard score in Python for binary classification
jaccard_score(y_test,y_pred_weighted)

# Plot Matthews Correlation Coefficient in Python for binary classification
matthews_corrcoef(y_test,y_pred_weighted)

# Plot confusion matrix in Python for binary classification
confusion_matrix(y_test,y_pred_weighted)

In [None]:
# Plot Jaccard score in Python for binary classification
jaccard_score(y_test,y_pred_weighted)

In [None]:
# Plot Matthews Correlation Coefficient in Python for binary classification
matthews_corrcoef(y_test,y_pred_weighted)

In [None]:
# Plot confusion matrix in Python for binary classification
confusion_matrix(y_test,y_pred_weighted)

**Naive Bayesian model**

In [None]:
# Importing required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report, roc_auc_score, jaccard_score, matthews_corrcoef, confusion_matrix
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.naive_bayes import GaussianNB

In [None]:
# Split the dataset into features and labels
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [None]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y)

In [None]:
# Perform PCA on the training set
pca = PCA(n_components=32)
X_train_pca = pca.fit_transform(X_train)

In [None]:
# Perform hyperparameter tuning using GridSearchCV
param_grid = {'var_smoothing': [1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1]}
grid = GridSearchCV(GaussianNB(), param_grid=param_grid, cv=5)
grid.fit(X_train_pca, y_train)

In [None]:
# Train the model with the best hyperparameters
clf = GaussianNB(var_smoothing=grid.best_params_['var_smoothing'])
clf.fit(X_train_pca, y_train)

In [None]:
# Perform cross-validation on the training set
scores = cross_val_score(clf, X_train_pca, y_train, cv=5)

In [None]:
# Introduce weighted categorical cross-entropy in the training dataset
class_weights = {0: 1.0, 1: 0}
clf_weighted = GaussianNB(var_smoothing=grid.best_params_['var_smoothing'])
clf_weighted.fit(X_train_pca, y_train)

In [None]:
# Predict the labels of the testing set using the trained model
X_test_pca = pca.transform(X_test)
y_pred = clf.predict(X_test_pca)
y_pred_weighted = clf_weighted.predict(X_test_pca)

In [None]:
# Plot AUC score for binary classification in Python
roc_auc_score(y_test, y_pred_weighted)

In [None]:
# Plot ROC curve in Python for binary classification
fpr, tpr, thresholds = roc_curve(y_test, y_pred_weighted)
plt.plot(fpr,tpr,label="data 1")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.legend(loc=4)
plt.show()

In [None]:
# Plot Classification report in Python for binary classification
print(classification_report(y_test,y_pred_weighted))

In [None]:
# Plot Jaccard score in Python for binary classification
jaccard_score(y_test,y_pred_weighted)

In [None]:
# Plot Matthews Correlation Coefficient in Python for binary classification
matthews_corrcoef(y_test,y_pred_weighted)

In [None]:
# Plot confusion matrix in Python for binary classification
confusion_matrix(y_test,y_pred_weighted)

**K-Nearest neighbors classifier**

In [None]:
# Importing required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report, roc_auc_score, jaccard_score, matthews_corrcoef, confusion_matrix
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.neighbors import KNeighborsClassifier

In [None]:
# Split the dataset into features and labels
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [None]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y)

In [None]:
# Perform PCA on the training set
pca = PCA(n_components=32)
X_train_pca = pca.fit_transform(X_train)

In [None]:
# Perform hyperparameter tuning using GridSearchCV
param_grid = {'n_neighbors': [3,4, 5,6,7], 'weights': ['uniform', 'distance']}
grid = GridSearchCV(KNeighborsClassifier(), param_grid=param_grid, cv=5)
grid.fit(X_train_pca, y_train)

In [None]:
# Introduce weighted categorical cross-entropy in the training dataset
class_weights = {0: 1.0, 1: 0}
clf_weighted = KNeighborsClassifier(n_neighbors=grid.best_params_['n_neighbors'], weights=grid.best_params_['weights'])
clf_weighted.fit(X_train_pca, y_train)

In [None]:
# Train the model with the best hyperparameters
clf = KNeighborsClassifier(n_neighbors=grid.best_params_['n_neighbors'], weights=grid.best_params_['weights'])
clf.fit(X_train_pca, y_train)

In [None]:
# Perform cross-validation on the training set
scores = cross_val_score(clf, X_train_pca, y_train, cv=5)

In [None]:
# Predict the labels of the testing set using the trained model
X_test_pca = pca.transform(X_test)
y_pred = clf.predict(X_test_pca)
y_pred_weighted = clf_weighted.predict(X_test_pca)

In [None]:
# Plot AUC score for binary classification in Python
roc_auc_score(y_test, y_pred_weighted)

In [None]:
# Plot ROC curve in Python for binary classification
fpr, tpr, thresholds = roc_curve(y_test, y_pred_weighted)
plt.plot(fpr,tpr,label="data 1")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.legend(loc=4)
plt.show()

In [None]:
# Plot Classification report in Python for binary classification
print(classification_report(y_test,y_pred_weighted))

In [None]:
# Plot Jaccard score in Python for binary classification
jaccard_score(y_test,y_pred_weighted)

In [None]:
# Plot Matthews Correlation Coefficient in Python for binary classification
matthews_corrcoef(y_test,y_pred_weighted)

In [None]:
# Plot confusion matrix in Python for binary classification
confusion_matrix(y_test,y_pred_weighted)

**Artificial Neural Network**

In [None]:
# Importing required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report, roc_auc_score, jaccard_score, matthews_corrcoef, confusion_matrix
from sklearn.model_selection import GridSearchCV, train_test_split
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

In [None]:
# Split the dataset into features and labels
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [None]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y)

In [None]:
# Perform PCA on the training set
pca = PCA(n_components=32)
X_train_pca = pca.fit_transform(X_train)

In [None]:
# Define the model architecture
model = Sequential()
model.add(Dense(12, input_dim=2, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

In [None]:
# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
# Perform hyperparameter tuning using GridSearchCV
param_grid = {'batch_size': [10, 20], 'epochs': [50, 100]}
grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring='accuracy')
#grid.fit(X_train_pca, y_train)

In [None]:
# Train the model with the best hyperparameters
clf = Sequential()
clf.add(Dense(12, input_dim=2, activation='relu'))
clf.add(Dense(8, activation='relu'))
clf.add(Dense(1, activation='sigmoid'))
clf.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
#clf.fit(X_train_pca, y_train, epochs=grid.best_params_['epochs'], batch_size=grid.best_params_['batch_size'])

In [None]:


# Perform hyperparameter tuning using GridSearchCV
param_grid = {'batch_size': [10, 20], 'epochs': [50, 100]}
grid = GridSearchCV(estimator=model, param_grid=param_grid)
grid.fit(X_train_pca, y_train)

# Train the model with the best hyperparameters
clf = Sequential()
clf.add(Dense(12, input_dim=2, activation='relu'))
clf.add(Dense(8, activation='relu'))
clf.add(Dense(1, activation='sigmoid'))
clf.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
clf.fit(X_train_pca, y_train, epochs=grid.best_params_['epochs'], batch_size=grid.best_params_['batch_size'])

# Perform cross-validation on the training set
scores = cross_val_score(clf, X_train_pca, y_train, cv=5)

# Introduce weighted categorical cross-entropy in the training dataset
class_weights = {0: 1.0, 1: 0}
clf_weighted = Sequential()
clf_weighted.add(Dense(12, input_dim=2, activation='relu'))
clf_weighted.add(Dense(8, activation='relu'))
clf_weighted.add(Dense(1, activation='sigmoid'))
clf_weighted.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
clf_weighted.fit(X_train_pca, y_train, epochs=grid.best_params_['epochs'], batch_size=grid.best_params_['batch_size'], class_weight=class_weights)

# Predict the labels of the testing set using the trained model
X_test_pca = pca.transform(X_test)
y_pred_weighted = clf_weighted.predict_classes(X_test_pca)

# Plot AUC score for binary classification in Python
print(f"AUC Score: {roc_auc_score(y_test,y_pred_weighted)}")

# Plot ROC curve in Python for binary classification
fpr,tpr,_=roc_curve(y_test,y_pred_weighted)
plt.plot(fpr,tpr,label="data 1")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.legend(loc=4)
plt.show()

# Plot Classification report in Python for binary classification
print(classification_report(y_test,y_pred_weighted))

# Plot Jaccard score in Python for binary classification
print(f"Jaccard Score: {jaccard_score(y_test,y_pred_weighted)}")

# Plot Matthews Correlation Coefficient in Python for binary classification
print(f"Matthews Correlation Coefficient: {matthews_corrcoef(y_test,y_pred_weighted)}")

# Plot confusion matrix in Python for binary classification
print(f"Confusion Matrix:\n{confusion_matrix(y_test,y_pred_weighted)}")


**Gradient boosting**

In [None]:
# Importing required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report, roc_auc_score, jaccard_score, matthews_corrcoef, confusion_matrix
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.ensemble import GradientBoostingClassifier

In [None]:

# Split the dataset into features and labels
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [None]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y)

In [None]:

# Perform PCA on the training set
pca = PCA(n_components=32)
X_train_pca = pca.fit_transform(X_train)

In [None]:
# Perform hyperparameter tuning using GridSearchCV
param_grid = {'n_estimators': [100, 200, 250, 230,300, 400], 'max_depth': [3, 5, 6,7,8], 'learning_rate': [0.01, 0.1, 0.2,0.3]}
grid = GridSearchCV(GradientBoostingClassifier(), param_grid=param_grid, cv=5)
grid.fit(X_train_pca, y_train)

In [None]:
# Train the model with the best hyperparameters
clf = GradientBoostingClassifier(n_estimators=grid.best_params_['n_estimators'], max_depth=grid.best_params_['max_depth'], learning_rate=grid.best_params_['learning_rate'])
clf.fit(X_train_pca, y_train)

In [None]:
# Perform cross-validation on the training set
scores = cross_val_score(clf, X_train_pca, y_train, cv=5)

In [None]:
# Introduce weighted categorical cross-entropy in the training dataset
class_weights = {0: 1.0, 1: 0}
clf_weighted = GradientBoostingClassifier(n_estimators=grid.best_params_['n_estimators'], max_depth=grid.best_params_['max_depth'], learning_rate=grid.best_params_['learning_rate'])
clf_weighted.fit(X_train_pca, y_train)

In [None]:
# Predict the labels of the testing set using the trained model
X_test_pca = pca.transform(X_test)
y_pred_weighted = clf_weighted.predict(X_test_pca)

In [None]:
# Plot AUC score for binary classification in Python
print(f"AUC Score: {roc_auc_score(y_test,y_pred_weighted)}")

In [None]:
# Plot ROC curve in Python for binary classification
fpr,tpr,_=roc_curve(y_test,y_pred_weighted)
plt.plot(fpr,tpr,label="data 1")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.legend(loc=4)
plt.show()

In [None]:
# Plot Classification report in Python for binary classification
print(classification_report(y_test,y_pred_weighted))

In [None]:
# Plot Jaccard score in Python for binary classification
print(f"Jaccard Score: {jaccard_score(y_test,y_pred_weighted)}")

In [None]:
# Plot Matthews Correlation Coefficient in Python for binary classification
print(f"Matthews Correlation Coefficient: {matthews_corrcoef(y_test,y_pred_weighted)}")

In [None]:
# Plot confusion matrix in Python for binary classification
print(f"Confusion Matrix:\n{confusion_matrix(y_test,y_pred_weighted)}")

**Logistic regression**

In [None]:
# Importing required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report, roc_auc_score, jaccard_score, matthews_corrcoef, confusion_matrix
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.linear_model import LogisticRegression

In [None]:
# Split the dataset into features and labels
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [None]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y)

In [None]:
# Perform PCA on the training set
pca = PCA(n_components=32)
X_train_pca = pca.fit_transform(X_train)

In [None]:
# Perform hyperparameter tuning using GridSearchCV
param_grid = {'C': [0.1,0.2,0.3,0.6,0.8, 1,4,6,8, 10], 'penalty': ['l1', 'l2']}
grid = GridSearchCV(LogisticRegression(), param_grid=param_grid, cv=5)
grid.fit(X_train_pca, y_train)

In [None]:
# Train the model with the best hyperparameters
clf = LogisticRegression(C=grid.best_params_['C'], penalty=grid.best_params_['penalty'])
clf.fit(X_train_pca, y_train)

In [None]:
# Perform cross-validation on the training set
scores = cross_val_score(clf, X_train_pca, y_train, cv=5)

In [None]:
# Introduce weighted categorical cross-entropy in the training dataset
class_weights = {0: 1.0, 1: 0}
clf_weighted = LogisticRegression(C=grid.best_params_['C'], penalty=grid.best_params_['penalty'], class_weight=class_weights)
clf_weighted.fit(X_train_pca, y_train)

In [None]:
# Predict the labels of the testing set using the trained model
X_test_pca = pca.transform(X_test)
y_pred_weighted = clf_weighted.predict(X_test_pca)

In [None]:
# Plot AUC score for binary classification in Python
print(f"AUC Score: {roc_auc_score(y_test,y_pred_weighted)}")

In [None]:
# Plot ROC curve in Python for binary classification
fpr,tpr,_=roc_curve(y_test,y_pred_weighted)
plt.plot(fpr,tpr,label="data 1")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.legend(loc=4)
plt.show()

In [None]:
# Plot Classification report in Python for binary classification
print(classification_report(y_test,y_pred_weighted))

In [None]:
# Plot Jaccard score in Python for binary classification
print(f"Jaccard Score: {jaccard_score(y_test,y_pred_weighted)}")

In [None]:
 Plot Matthews Correlation Coefficient in Python for binary classification
print(f"Matthews Correlation Coefficient: {matthews_corrcoef(y_test,y_pred_weighted)}")

In [None]:
# Plot confusion matrix in Python for binary classification
print(f"Confusion Matrix:\n{confusion_matrix(y_test,y_pred_weighted)}")

**Decision tree**

In [None]:
# Importing required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report, roc_auc_score, jaccard_score, matthews_corrcoef, confusion_matrix
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.tree import DecisionTreeClassifier

In [None]:
# Split the dataset into features and labels
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [None]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y)

In [None]:
# Perform PCA on the training set
pca = PCA(n_components=32)
X_train_pca = pca.fit_transform(X_train)

In [None]:
# Perform hyperparameter tuning using GridSearchCV
param_grid = {'max_depth': [3, 4, 5, 6, 8, 9], 'min_samples_split': [2,3, 4, 5, 6, 7]}
grid = GridSearchCV(DecisionTreeClassifier(), param_grid=param_grid, cv=5)
grid.fit(X_train_pca, y_train)

In [None]:
# Train the model with the best hyperparameters
clf = DecisionTreeClassifier(max_depth=grid.best_params_['max_depth'], min_samples_split=grid.best_params_['min_samples_split'])
clf.fit(X_train_pca, y_train)

In [None]:
# Perform cross-validation on the training set
scores = cross_val_score(clf, X_train_pca, y_train, cv=5)

In [None]:
# Introduce weighted categorical cross-entropy in the training dataset
class_weights = {0: 1.0, 1: 0}
clf_weighted = DecisionTreeClassifier(max_depth=grid.best_params_['max_depth'], min_samples_split=grid.best_params_['min_samples_split'], class_weight=class_weights)
clf_weighted.fit(X_train_pca, y_train)

In [None]:
# Predict the labels of the testing set using the trained model
X_test_pca = pca.transform(X_test)
y_pred_weighted = clf_weighted.predict(X_test_pca)

In [None]:
# Plot AUC score for binary classification in Python
print(f"AUC Score: {roc_auc_score(y_test,y_pred_weighted)}")

In [None]:
# Plot ROC curve in Python for binary classification
fpr,tpr,_=roc_curve(y_test,y_pred_weighted)
plt.plot(fpr,tpr,label="data 1")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.legend(loc=4)
plt.show()

In [None]:
# Plot Classification report in Python for binary classification
print(classification_report(y_test,y_pred_weighted))

In [None]:
# Plot Jaccard score in Python for binary classification
print(f"Jaccard Score: {jaccard_score(y_test,y_pred_weighted)}")

In [None]:
# Plot Matthews Correlation Coefficient in Python for binary classification
print(f"Matthews Correlation Coefficient: {matthews_corrcoef(y_test,y_pred_weighted)}")

In [None]:
# Plot confusion matrix in Python for binary classification
print(f"Confusion Matrix:\n{confusion_matrix(y_test,y_pred_weighted)}")

**Random forest**

In [None]:
# Importing required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report, roc_auc_score, jaccard_score, matthews_corrcoef, confusion_matrix
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.ensemble import RandomForestClassifier

In [None]:
# Split the dataset into features and labels
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [None]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y)

In [None]:
# Perform PCA on the training set
pca = PCA(n_components=32)
X_train_pca = pca.fit_transform(X_train)

In [None]:
# Perform hyperparameter tuning using GridSearchCV
param_grid = {'n_estimators': [100, 200, 250, 300, 400], 'max_depth': [3, 4, 5, 6,7,8], 'class_weight': ['balanced', {0: 1.0, 1: 0}]}
grid = GridSearchCV(RandomForestClassifier(), param_grid=param_grid, cv=5)
grid.fit(X_train_pca, y_train)

In [None]:
# Train the model with the best hyperparameters
clf = RandomForestClassifier(n_estimators=grid.best_params_['n_estimators'], max_depth=grid.best_params_['max_depth'], class_weight=grid.best_params_['class_weight'])
clf.fit(X_train_pca, y_train)

In [None]:
# Perform cross-validation on the training set
scores = cross_val_score(clf, X_train_pca, y_train, cv=5)

In [None]:
# Predict the labels of the testing set using the trained model
X_test_pca = pca.transform(X_test)
y_pred_weighted = clf.predict(X_test_pca)

In [None]:
# Plot AUC score for binary classification in Python
print(f"AUC Score: {roc_auc_score(y_test,y_pred_weighted)}")

In [None]:
# Plot ROC curve in Python for binary classification
fpr,tpr,_=roc_curve(y_test,y_pred_weighted)
plt.plot(fpr,tpr,label="data 1")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.legend(loc=4)
plt.show()

In [None]:
# Plot Classification report in Python for binary classification
print(classification_report(y_test,y_pred_weighted))

In [None]:
# Plot Jaccard score in Python for binary classification
print(f"Jaccard Score: {jaccard_score(y_test,y_pred_weighted)}")

In [None]:
# Plot Matthews Correlation Coefficient in Python for binary classification
print(f"Matthews Correlation Coefficient: {matthews_corrcoef(y_test,y_pred_weighted)}")

In [None]:
# Plot confusion matrix in Python for binary classification
print(f"Confusion Matrix:\n{confusion_matrix(y_test,y_pred_weighted)}")