In [7]:

from torch import nn, optim
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
# Load the dataset
# Assuming the dataset is in a CSV file named 'ecg_data.csv'
# df = pd.read_csv('audit_risk.csv')
# trial = pd.read_csv('trial.csv')

# X = df.iloc[:, :-1]
# y = df.iloc[:, -1]

# X_test = trial.iloc[:, :-1]
# y_test = trial.iloc[:, -1]

class CNNClassifier(nn.Module):
    def __init__(self):
        super(CNNClassifier, self).__init__()
        
        self.CNN = nn.Conv2d(10, 32, kernel_size = 3, stride=1, padding = 1)
        self.CNN2 = nn.Conv2d(32, 64, kernel_size = 3, stride=1, padding=1)
        
      
        self.fc1 = nn.Linear(2048, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 64)
        self.fc4 = nn.Linear(64, 10)

        self.sigmoid = nn.Sigmoid()
        self.dropout = nn.Dropout(0.5)
           
        
    def forward(self, x):
        # x shape: (batch, seq_len, input_size)
        x = x.unsqueeze(1)
        
        # x shape: (batch, 1, seq_len, input_size)
        x = self.CNN(x)
        
        x = F.relu(x)
        x = self.CNN2(x)
        
        x = F.relu(x)
        
        x = x.reshape(1, -1)
        
        # Flatten the output
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.dropout(x)
        x = self.fc3(x)
        x = F.relu(x)
        x = self.dropout(x)
        x = self.fc4(x)
        x = F.relu(x)
        
        
        x = x.reshape(10, 1)
        
        # Final output
        
       
        x = self.sigmoid(x)
        return x


# Custom Dataset for Numerical Arrays
class NumericalDataset(Dataset):
    def __init__(self, X, y=None):
        X = X.values
        y = y.values
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32) if y is not None else None
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        if self.y is not None:
            return self.X[idx], self.y[idx]
        return self.X[idx]
    
    def return_X_y(self):
        return self.X, self.y

# X = X.apply(pd.to_numeric, errors='coerce')
# y = y.apply(pd.to_numeric, errors='coerce')
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

NumericalData = NumericalDataset(X_train, y_train)
NumericalDataset_test = NumericalDataset(X_test, y_test)
X_train, y_train = NumericalData.return_X_y()
X_test, y_test = NumericalDataset_test.return_X_y()



class Transformer(nn.Module):
    def __init__(self, num_features, dim=32, depth=4, heads=8, num_classes=1):
        super(Transformer, self).__init__()
        self.cls_token = nn.Parameter(torch.randn(1, 1, dim))
        self.feature_embed = nn.Linear(num_features, dim)
        
        # Transformer Encoder
        self.transformer = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(
                d_model=dim,
                nhead=heads,
                dim_feedforward=dim*4,
                dropout=0.1,
                activation='gelu',
                batch_first=True
            ),
            num_layers=depth
        )
        
        # Output Head
        self.mlp = nn.Sequential(
            nn.LayerNorm(dim),
            nn.Linear(dim, dim//2),
            nn.GELU(),
            nn.Linear(dim//2, num_classes)
        )
        
    def forward(self, x):
        # x shape: (batch, num_features)
        x = self.feature_embed(x)  # (batch, num_features, dim)
        x = x.unsqueeze(1)  # (batch, 1, num_features, dim)
        
        x = x.unsqueeze(2)
        # Add CLS token
        cls_tokens = self.cls_token.expand(x.size(0), -1, -1)
        
        x = torch.mean(x, dim=2)  # (batch, num_features, dim)
        x = torch.cat((cls_tokens, x), dim=1)  # (batch, num_features+1, dim)
        
        # Transformer processing
        x = self.transformer(x)
        
        # Use CLS token for prediction
        cls_output = x[:, 0]
        
          # (batch, 1, dim)
        
          # Sigmoid for binary classification
        x = self.mlp(cls_output)
        x = torch.sigmoid(x)  # (batch, num_classes)
        return cls_output, x



In [8]:
MIN_DELTA = 0.0001
MIN_LR = 1e-5
class LRScheduler():
    """
    Learning rate scheduler. If the validation loss does not decrease for the 
    given number of `patience` epochs, then the learning rate will decrease by
    by given `factor`.
    """
    def __init__(self, optimizer, patience=3, min_lr=MIN_LR, factor=0.1):
        """
        new_lr = old_lr * factor
        :param optimizer: the optimizer we are using
        :param patience: how many epochs to wait before updating the lr
        :param min_lr: least lr value to reduce to while updating
        :param factor: factor by which the lr should be updated
        """
        self.optimizer = optimizer
        self.patience = patience
        self.min_lr = min_lr
        self.factor = factor
        self.lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( 
                self.optimizer,
                mode='min',
                patience=self.patience,
                factor=self.factor,
                min_lr=self.min_lr
            )
    def __call__(self, val_loss):
        # take one step of the learning rate scheduler while providing the validation loss as the argument
        self.lr_scheduler.step(val_loss)
    
model = Transformer(num_features=X.shape[-1], dim=32, depth=4, heads=8, num_classes=1)
model2 = CNNClassifier()

# model = model.to('cuda' if torch.cuda.is_available() else 'cpu')
model2 = model2.to('cuda' if torch.cuda.is_available() else 'cpu')
criterion = nn.BCELoss()
criterion2 = nn.BCELoss()
# optimizer = optim.Adam(model.parameters(), lr=0.001)
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
optimizer2 = optim.Adam(model2.parameters(), lr=0.001, weight_decay=1e-5)

def scheduler(optimizer, step_size=10, gamma=0.1):
    """Step learning rate scheduler."""
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)
    return scheduler

# Split data into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# # Convert to PyTorch tensors
# X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to('cuda' if torch.cuda.is_available() else 'cpu')
# y_train_tensor = torch.tensor(y_train, dtype=torch.float32).to('cuda' if torch.cuda.is_available() else 'cpu')
# X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to('cuda' if torch.cuda.is_available() else 'cpu')
# y_test_tensor = torch.tensor(y_test, dtype=torch.float32).to('cuda' if torch.cuda.is_available() else 'cpu')
# Training loop
num_epochs = 5
batch_size = 10
train_loss_lst = []
test_loss_lst = []
# lr_schedule = LRScheduler(optimizer)
num_batches = len(X) // batch_size
for epoch in range(num_epochs):
    model2.train()
    
    inputs = X_train
    labels = y_train
    for i in range(0, len(inputs), batch_size):    
        # optimizer.zero_grad()
        # outputs= model.forward(inputs)
    
        try: 
            optimizer.zero_grad()
            
            
            outputs, y_pred = model.forward(inputs[i:i+batch_size, :].to('cuda' if torch.cuda.is_available() else 'cpu'))
            
            label = labels[i:i+batch_size].to('cuda' if torch.cuda.is_available() else 'cpu')
            label = label.unsqueeze(1)
            
            
            
        
            loss = criterion(y_pred, label)
            loss.backward()
            optimizer.step()
            optimizer2.zero_grad()
            
            outputs2 = outputs.clone().detach()
            
            y_pred2 = model2.forward(outputs2)
            
            
            loss2 = criterion2(y_pred2, label)
            loss2.backward()
            optimizer2.step()
                
            # print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")
            print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss2.item():.4f}")
            train_loss_lst.append(loss2.item())
        except:
            continue
    # Evaluation
        model2.eval()
        with torch.no_grad():
            loss = []
            preds = {}
            # test_outputs, x = model(X_test_tensor)
            for i in range(0, len(X_test), batch_size):
                try: 
            # test_outputs = test_outputs.view(-1, 32, 1)
                    
                    test_outputs2, y_pred2 = model.forward(X_test[i:i+batch_size, :].to('cuda' if torch.cuda.is_available() else 'cpu'))
                    
                   
                    test_loss = criterion(y_pred2, y_test[i:i+batch_size].unsqueeze(1))
                    test_outputs2 = torch.where(test_outputs2.isnan(), fill, test_outputs2) 
                    pred = model2.forward(test_outputs2)
                    
                    
                    
                    loss_2 = criterion2(pred, y_test[i:i+batch_size].to('cuda' if torch.cuda.is_available() else 'cpu').unsqueeze(1))
                    loss.append(loss_2.item())
                    for j in range(55, 80):
                        if j ==0:
                            preds[j] = []
                        pre = i*0.01
                        pre = torch.tensor(pre, dtype=torch.float32).to('cuda' if torch.cuda.is_available() else 'cpu')
                        pred = torch.where(pred>pre, 1, 0)
                    
                        mask = list(pred==y_test[i:i+batch_size])
                        preds[i].extend(mask)
                    
                except:
                    continue
            loss = torch.tensor(loss)
            
            print(torch.mean(loss))
            for key, value in preds.items():
                total = []
                for v in value:
                    for tr in v: 
                        if tr == True:
                            total.append(1)
                        else:
                            total.append(0)
                try: 
                    print(f"Accuracy for batch {key}: {sum(total)/len(total):.4f}")
                except:
                    continue
        # Sa    ve the model

Epoch [1/5], Loss: 0.7000
tensor(nan)
Epoch [1/5], Loss: 0.6967
tensor(nan)
Epoch [1/5], Loss: 0.6991
tensor(nan)
Epoch [1/5], Loss: 0.6892
tensor(nan)
Epoch [1/5], Loss: 0.6876
tensor(nan)
Epoch [1/5], Loss: 0.6841
tensor(nan)
Epoch [1/5], Loss: 0.7095
tensor(nan)
Epoch [1/5], Loss: 0.7126
tensor(nan)
Epoch [1/5], Loss: 0.7073
tensor(nan)
Epoch [1/5], Loss: 0.7015
tensor(nan)
Epoch [1/5], Loss: 0.6982
tensor(nan)
Epoch [1/5], Loss: 0.6902
tensor(nan)
Epoch [1/5], Loss: 0.6951
tensor(nan)
Epoch [1/5], Loss: 0.6963
tensor(nan)
Epoch [1/5], Loss: 0.6957
tensor(nan)
Epoch [1/5], Loss: 0.6952
tensor(nan)
Epoch [1/5], Loss: 0.6934
tensor(nan)
Epoch [1/5], Loss: 0.6934
tensor(nan)
Epoch [1/5], Loss: 0.6928
tensor(nan)
Epoch [1/5], Loss: 0.6926
tensor(nan)
Epoch [1/5], Loss: 0.6930
tensor(nan)
Epoch [1/5], Loss: 0.6966
tensor(nan)
Epoch [1/5], Loss: 0.6957
tensor(nan)
Epoch [1/5], Loss: 0.6932
tensor(nan)
Epoch [1/5], Loss: 0.6962
tensor(nan)
Epoch [1/5], Loss: 0.6953
tensor(nan)
Epoch [1/5],

In [1]:
from sklearn.svm import SVC
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
import pandas as pd
from sklearn.model_selection import train_test_split
df = pd.read_csv('audit_risk.csv')
trial = pd.read_csv('trial.csv')
for col in df.columns:
    if df[col].isna().any():
        print(col)
        print(df[df[col].isna()])
        print(df[df[col].isna()].index)
        print(col)
        df = df.loc[~df[col].isna(), :]
for col in df.columns:
    try:
       
    
        df = df.loc[~(df[col] == 'LOHARU'), :]
        df = df.loc[~(df[col] == str), :]
    except:
        continue


# iter_impute = IterativeImputer(max_iter=10, random_state=42)
# df = iter_impute.fit_transform(df)

print(df)

X = df.iloc[:, 2:-2]
X = X.apply(pd.to_numeric, errors='coerce')
y = df.iloc[:, -1]
y = y.apply(pd.to_numeric, errors='coerce')
print(X, y)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)





Money_Value
     Sector_score LOCATION_ID  PARA_A  Score_A  Risk_A  PARA_B  Score_B  \
642         55.57           4    0.23      0.2   0.046     0.0      0.2   

     Risk_B  TOTAL  numbers  ...  RiSk_E  History  Prob  Risk_F  Score  \
642     0.0   0.23      5.0  ...     0.4        0   0.2     0.0    2.0   

     Inherent_Risk  CONTROL_RISK  Detection_Risk  Audit_Risk  Risk  
642          1.446           0.4             0.5      0.2892     0  

[1 rows x 27 columns]
Index([642], dtype='int64')
Money_Value
     Sector_score LOCATION_ID  PARA_A  Score_A  Risk_A  PARA_B  Score_B  \
0            3.89          23    4.18      0.6   2.508    2.50      0.2   
1            3.89           6    0.00      0.2   0.000    4.83      0.2   
2            3.89           6    0.51      0.2   0.102    0.23      0.2   
3            3.89           6    0.00      0.2   0.000   10.80      0.6   
4            3.89           6    0.00      0.2   0.000    0.08      0.2   
..            ...         ...     ...

In [2]:
for i in range(1, 10):
    pre = i*0.1
    class_SVC = SVC(kernel='linear', C=pre, random_state=42)
    class_SVC.fit(X_train, y_train)

    print(class_SVC.score(X_test, y_test))

0.9935483870967742
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0


In [11]:
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.metrics import average_precision_score, precision_recall_curve
from sklearn.naive_bayes import GaussianNB
from sklearn.inspection import permutation_importance
gauss_model = GaussianNB()
gauss_model.fit(X_train, y_train)
imps = permutation_importance(gauss_model, X_test, y_test, n_repeats=10, random_state=42)
print(imps.importances_mean)
print(X_test.columns[imps.importances_mean.argsort()[::-1]])
y_pred = gauss_model.score(X_test, y_test)
print("Gaussian Naive Bayes Accuracy:", y_pred)
print("Classification Report:\n", classification_report(y_test, gauss_model.predict(X_test)))
print("Confusion Matrix:\n", confusion_matrix(y_test, gauss_model.predict(X_test)))
print("Accuracy:", accuracy_score(y_test, gauss_model.predict(X_test)))
print("Precision:", precision_score(y_test, gauss_model.predict(X_test), average='weighted'))
print("Recall:", recall_score(y_test, gauss_model.predict(X_test), average='weighted'))
print("F1 Score:", f1_score(y_test, gauss_model.predict(X_test), average='weighted'))
print("ROC AUC Score:", roc_auc_score(y_test, gauss_model.predict(X_test)))
print("Average Precision Score:", average_precision_score(y_test, gauss_model.predict(X_test)))


[ 0.02774194  0.00258065  0.03290323  0.08774194 -0.00580645  0.0883871
  0.09032258  0.04064516  0.04064516  0.04064516  0.10322581  0.11032258
  0.11354839  0.00258065  0.00516129  0.01096774  0.01677419  0.01677419
  0.01677419 -0.00129032  0.13870968  0.0116129   0.        ]
Index(['Inherent_Risk', 'Risk_D', 'Score_MV', 'Money_Value', 'TOTAL', 'Risk_B',
       'PARA_B', 'numbers', 'Score_B.1', 'Risk_C', 'Risk_A', 'PARA_A',
       'History', 'Prob', 'Risk_F', 'CONTROL_RISK', 'RiSk_E', 'PROB',
       'District_Loss', 'Score_A', 'Detection_Risk', 'Score', 'Score_B'],
      dtype='object')
Gaussian Naive Bayes Accuracy: 0.9612903225806452
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.94      0.97        95
           1       0.91      1.00      0.95        60

    accuracy                           0.96       155
   macro avg       0.95      0.97      0.96       155
weighted avg       0.96      0.96      0.96       155

Co

In [None]:
from sklearn.ensemble import RandomForestClassifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.score(X_test, y_test)
print("Random Forest Accuracy:", y_pred_rf)
print("Classification Report:\n", classification_report(y_test, rf_model.predict(X_test)))
print("Confusion Matrix:\n", confusion_matrix(y_test, rf_model.predict(X_test)))
print("Accuracy:", accuracy_score(y_test, rf_model.predict(X_test)))
print("Precision:", precision_score(y_test, rf_model.predict(X_test), average='weighted'))
print("Recall:", recall_score(y_test, rf_model.predict(X_test), average='weighted'))
print("F1 Score:", f1_score(y_test, rf_model.predict(X_test), average='weighted'))
print("ROC AUC Score:", roc_auc_score(y_test, rf_model.predict(X_test)))
print("Average Precision Score:", average_precision_score(y_test, rf_model.predict(X_test)))

Random Forest Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        95
           1       1.00      1.00      1.00        60

    accuracy                           1.00       155
   macro avg       1.00      1.00      1.00       155
weighted avg       1.00      1.00      1.00       155

Confusion Matrix:
 [[95  0]
 [ 0 60]]
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0
ROC AUC Score: 1.0
Average Precision Score: 1.0


In [6]:
from sklearn.neural_network import MLPClassifier
mlp_model = MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=500, random_state=42)
mlp_model.fit(X_train, y_train)
y_pred_mlp = mlp_model.score(X_test, y_test)
print("MLP Classifier Accuracy:", y_pred_mlp)
print("Classification Report:\n", classification_report(y_test, mlp_model.predict(X_test)))
print("Confusion Matrix:\n", confusion_matrix(y_test, mlp_model.predict(X_test)))
print("Accuracy:", accuracy_score(y_test, mlp_model.predict(X_test)))
print("Precision:", precision_score(y_test, mlp_model.predict(X_test), average='weighted'))
print("Recall:", recall_score(y_test, mlp_model.predict(X_test), average='weighted'))
print("F1 Score:", f1_score(y_test, mlp_model.predict(X_test), average='weighted'))



MLP Classifier Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        95
           1       1.00      1.00      1.00        60

    accuracy                           1.00       155
   macro avg       1.00      1.00      1.00       155
weighted avg       1.00      1.00      1.00       155

Confusion Matrix:
 [[95  0]
 [ 0 60]]
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0


In [12]:
from sklearn.neighbors import KNeighborsClassifier
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(X_train, y_train)
y_pred_knn = knn_model.score(X_test, y_test)
print("KNN Classifier Accuracy:", y_pred_knn)
print("Classification Report:\n", classification_report(y_test, knn_model.predict(X_test)))
print("Confusion Matrix:\n", confusion_matrix(y_test, knn_model.predict(X_test)))
print("Accuracy:", accuracy_score(y_test, knn_model.predict(X_test)))
print("Precision:", precision_score(y_test, knn_model.predict(X_test), average='weighted'))
print("Recall:", recall_score(y_test, knn_model.predict(X_test), average='weighted'))
print("F1 Score:", f1_score(y_test, knn_model.predict(X_test), average='weighted'))

KNN Classifier Accuracy: 0.9870967741935484
Classification Report:
               precision    recall  f1-score   support

           0       0.98      1.00      0.99        95
           1       1.00      0.97      0.98        60

    accuracy                           0.99       155
   macro avg       0.99      0.98      0.99       155
weighted avg       0.99      0.99      0.99       155

Confusion Matrix:
 [[95  0]
 [ 2 58]]
Accuracy: 0.9870967741935484
Precision: 0.987362820086465
Recall: 0.9870967741935484
F1 Score: 0.9870546291233825
