In [82]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score
import time
import psutil
import numpy as np
from sklearn import datasets

classDataset = "https://raw.githubusercontent.com/Akshay-De-Silva/ml_apis/main/stroke.csv"

df = pd.read_csv(classDataset)

In [89]:
def replace_with_noise(data, column_name="stroke", percentage=0):
    """
    Replace a percentage of values in a single column of a DataFrame or Series with noise.

    Args:
        data (pd.DataFrame or pd.Series): The data to modify.
        column_name (str, optional): The name of the column to modify. Defaults to "column".
        percentage (float, optional): The percentage of values to replace with noise. Defaults to 0.1.

    Returns:
        pd.DataFrame or pd.Series: The data with noise added.
    """

    if isinstance(data, pd.DataFrame):
        # Handle DataFrame case (already implemented)
        ...
    else:
        # Handle Series case (new logic)
        num_rows = len(data)
        num_replace = int(num_rows * percentage)
        idx = np.random.choice(np.arange(num_rows), size=num_replace, replace=False)
        noise = np.random.normal(0, 1, num_replace)
        data_noisy = data.copy()
        data_noisy.iloc[idx] = noise
        return data_noisy

    return df_noisy

In [90]:
noise = 0

features = ['age', 'hypertension', 'heart_disease', 'avg_glucose_level']
target = 'stroke'

X = df[features]
y = df[target]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

y_train = replace_with_noise(y_train,  noise)

from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
y_train = le.fit_transform(y_train)

In [91]:
from sklearn.svm import SVC
svm_classifier = SVC()

start_training_time = time.time()

svm_classifier.fit(X_train, y_train)

end_training_time = time.time()
training_time_duration = end_training_time - start_training_time

In [97]:
start_testing_time = time.perf_counter()

# Predict on the test set
y_pred = svm_classifier.predict(X_test)

end_testing_time = time.perf_counter()
testing_time_duration = end_testing_time - start_testing_time

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

f1Score = f1_score(y_test, y_pred)
print(f"F1Score: {f1Score:.2f}")

print(f"Training Time: {training_time_duration:.8f} seconds")
print(f"Testing Time: {testing_time_duration:.8f} seconds")


Confusion Matrix:
[[1444    0]
 [  89    0]]
Accuracy: 0.94
F1Score: 0.00
Training Time: 0.07105851 seconds
Testing Time: 0.08111848 seconds
