In [21]:
import pandas as pd
from glob import glob
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
import numpy as np

In [28]:
# The path to the new_data folder
csv_folder_path = "new_data/"

# Fetch all the csv files from the folder
csv_files = glob(csv_folder_path + "*.csv")

# Initialize lists to hold training and test data
train_data = []
test_data = []

# Process each CSV file
for file in csv_files:
    df = pd.read_csv(file)
    
    # Drop unnecessary columns
    df = df.drop(columns=['index', 'Unnamed: 0'], errors='ignore')
    
    # Drop the 'timestamp' attribute
    df = df.drop(columns=['timestamp'])
    
    # Get 2200 rows from the start
    start_df = df.head(2200)
    
    # Get 2200 rows from the end
    end_df = df.tail(2200)
    
    # Get 2200 rows from the middle
    middle_index = len(df) // 2
    middle_df = df.iloc[middle_index - 1100: middle_index + 1100]
    
    # Combine the start, middle, and end data for training
    train_df = pd.concat([start_df, middle_df, end_df])
    
    # Get the rows from position 4000 to 5400 for testing
    test_df = df.iloc[4000:5400]
    
    # Append to the lists
    train_data.append(train_df)
    test_data.append(test_df)

# Concatenate all training and test data
train_data = pd.concat(train_data, ignore_index=True)
test_data = pd.concat(test_data, ignore_index=True)

# Separate the input data from the output data (label)
x_train = train_data.drop(columns=['label'])
y_train = train_data['label']
x_test = test_data.drop(columns=['label'])
y_test = test_data['label']

# Creation and training of the Gaussian Naive Bayes Classifier
model = GaussianNB()
model.fit(x_train, y_train)

# Predictions with the usage of the training data set
y_pred = model.predict(x_test)

# Calculation of the model's accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.3f}')

# Predict the label for 10 random records
num_samples = 10

# Get 10 random indices from the x_test data set
random_indices = np.random.randint(0, len(x_test), size=num_samples)

for i, random_index in enumerate(random_indices):
    random_sample = x_test.iloc[random_index].values.reshape(1, -1)  # Reshape to match model's expected input shape
    actual_label = y_test.iloc[random_index]
    
    # Make the prediction
    predicted_label = model.predict(random_sample)
    
    # Display the random sample and the label prediction
    print(f'\nSample {i+1}:')
    print(f'Random Sample Index: {random_index}')
    print(f'Input Features: {random_sample}')
    print(f'Actual Label: {actual_label}')
    print(f'Predicted Label: {predicted_label}')


Accuracy: 0.729

Sample 1:
Random Sample Index: 26471
Input Features: [[-0.8821 -0.0852 -0.8145 -1.3335 -0.3738 -1.4788]]
Actual Label: 8
Predicted Label: [0]

Sample 2:
Random Sample Index: 21317
Input Features: [[-1.0015  0.2681  0.1365 -1.2913 -0.0159 -0.3958]]
Actual Label: 0
Predicted Label: [2]

Sample 3:
Random Sample Index: 11021
Input Features: [[-0.9973  0.0544 -0.026  -0.3562  0.1042  0.935 ]]
Actual Label: 6
Predicted Label: [6]

Sample 4:
Random Sample Index: 15313
Input Features: [[-1.7241 -0.7457  0.0703 -4.2167  2.0449 -0.8011]]
Actual Label: 1
Predicted Label: [1]

Sample 5:
Random Sample Index: 26689
Input Features: [[-0.0933  0.1643 -0.5574  0.6338  0.3501  1.3499]]
Actual Label: 1
Predicted Label: [1]

Sample 6:
Random Sample Index: 26757
Input Features: [[ 0.5471 -0.075  -0.7229 -0.6306 -2.2129 -3.4436]]
Actual Label: 1
Predicted Label: [1]

Sample 7:
Random Sample Index: 17020
Input Features: [[-0.9066  0.0831 -0.4679 -0.4379  0.2326  0.868 ]]
Actual Label: 6
Pred

