In [1]:
import pandas as pd
from glob import glob
from sklearn.naive_bayes import GaussianNB
import numpy as np

In [2]:
# The path to the new_data folder
csv_folder_path = "../new_data/"

# Fetch all the csv files from the folder
csv_files = glob(csv_folder_path + "*.csv")

# Initialize lists to hold training and test data
train_data = []
test_data = []

# Process each CSV file
for file in csv_files:
    df = pd.read_csv(file)
    
    # Drop the 'timestamp' attribute
    df = df.drop(columns=['timestamp'])
    
    # Get 2200 rows from the start
    start_df = df.head(2200)
    
    # Get 2200 rows from the end
    end_df = df.tail(2200)
    
    # Get 2200 rows from the middle
    middle_index = len(df) // 2
    middle_df = df.iloc[middle_index - 1100: middle_index + 1100]
    
    # Combine the start, middle, and end data for training
    train_df = pd.concat([start_df, middle_df, end_df])
    
    # Get the rows from position 4000 to 5400 for testing
    test_df = df.iloc[4000:5400]
    
    # Append to the lists
    train_data.append(train_df)
    test_data.append(test_df)

# Concatenate all training and test data
train_data = pd.concat(train_data, ignore_index=True)
test_data = pd.concat(test_data, ignore_index=True)

# Separate the input data from the output data (label)
x_train = train_data.drop(columns=['label']).values
y_train = train_data['label'].values
x_test = test_data.drop(columns=['label']).values
y_test = test_data['label'].values

# Creation and training of the Gaussian Naive Bayes Classifier
model = GaussianNB()
model.fit(x_train, y_train)

# Predictions with the usage of the training data set
y_pred = model.predict(x_test)

# Predict the label for 10 random records
num_samples = 10

# Get 10 random indices from the x_test data set
random_indices = np.random.randint(0, len(x_test), size=num_samples)

for i, random_index in enumerate(random_indices):
    random_sample = x_test[random_index].reshape(1, -1)  # Reshape to match model's expected input shape
    actual_label = y_test[random_index]
    
    # Make the prediction
    predicted_label = model.predict(random_sample)
    
    # Display the random sample and the label prediction
    print(f'\nSample {i+1}:')
    print(f'Random Sample Index: {random_index}')
    print(f'Input Features: {random_sample}')
    print(f'Actual Label: {actual_label}')
    print(f'Predicted Label: {predicted_label}')



Sample 1:
Random Sample Index: 8919
Input Features: [[-0.9886  0.0519 -0.0506 -0.9019  0.1991 -0.3319]]
Actual Label: 5
Predicted Label: [5]

Sample 2:
Random Sample Index: 15186
Input Features: [[ 0.2014 -0.0881 -0.1975 -1.5381 -1.1477 -1.395 ]]
Actual Label: 1
Predicted Label: [1]

Sample 3:
Random Sample Index: 28946
Input Features: [[-0.6418  0.0218 -0.7673 -0.234   0.0576  0.9722]]
Actual Label: 6
Predicted Label: [6]

Sample 4:
Random Sample Index: 8835
Input Features: [[-0.8003  0.0151  0.5511 -0.3717 -0.0049  0.586 ]]
Actual Label: 8
Predicted Label: [6]

Sample 5:
Random Sample Index: 6186
Input Features: [[-1.0028  0.1895 -0.127  -0.9163  0.1898 -0.3412]]
Actual Label: 5
Predicted Label: [5]

Sample 6:
Random Sample Index: 10833
Input Features: [[-0.9991 -0.0199 -0.1158 -0.3705 -0.2635  0.9029]]
Actual Label: 6
Predicted Label: [6]

Sample 7:
Random Sample Index: 25693
Input Features: [[-1.0339  0.005   0.0162 -0.9856  0.034   0.0605]]
Actual Label: 5
Predicted Label: [5]

S

In [3]:
from evaluation import general_evaluation

general_evaluation(y_test,y_pred)

Accuracy: 0.73

Classification Report:
               precision    recall  f1-score   support

           0       0.60      0.51      0.55      6670
           1       0.74      0.72      0.73      2223
           2       0.15      0.13      0.14       983
           3       0.01      0.00      0.01       245
           4       0.00      0.00      0.00       571
           5       0.46      0.94      0.62      2233
           6       0.94      0.93      0.93     15174
           7       0.00      0.00      0.00         0
           8       0.53      0.43      0.48      2373
           9       0.42      0.11      0.18       221
          10       0.02      0.04      0.03        95
          11       0.00      0.00      0.00        12

    accuracy                           0.73     30800
   macro avg       0.32      0.32      0.31     30800
weighted avg       0.73      0.73      0.72     30800

Confusion Matrix:
[[ 3422   468   548     8     0  1569   545     1   103     1     5     0]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [4]:
from evaluation import get_roc_curve

get_roc_curve(rf_classifier,X_test,y_test)

NameError: name 'rf_classifier' is not defined