In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, log_loss

diabetes_dataset = pd.read_csv('C:/Users/Hemanth R/Desktop/multiple-disease-prediction-streamlit-app-main/dataset/diabetes.csv')

# Load and prepare the data (replace 'diabetes_dataset' with your loaded dataset variable)
X = diabetes_dataset.drop(columns='Outcome')
y = diabetes_dataset['Outcome']

# Split into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize lists to store metrics
training_accuracy = []
testing_accuracy = []
training_loss = []
testing_loss = []

# Train the model for 25 epochs
for epoch in range(25):
    # Initialize the SVM model (increase max_iter for iterative convergence)
    model = svm.SVC(probability=True, max_iter=epoch+1, random_state=42)

    # Fit the model to training data
    model.fit(X_train, y_train)

    # Predict probabilities for log loss calculation
    train_probs = model.predict_proba(X_train)
    test_probs = model.predict_proba(X_test)

    # Predict classes
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)

    # Calculate accuracy
    train_acc = accuracy_score(y_train, y_train_pred)
    test_acc = accuracy_score(y_test, y_test_pred)
    
    # Calculate log loss
    train_loss = log_loss(y_train, train_probs)
    test_loss = log_loss(y_test, test_probs)

    # Append metrics for each epoch
    training_accuracy.append(train_acc)
    testing_accuracy.append(test_acc)
    training_loss.append(train_loss)
    testing_loss.append(test_loss)

# Plot accuracy
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(range(1, 26), training_accuracy, label='Training Accuracy')
plt.plot(range(1, 26), testing_accuracy, label='Testing Accuracy')
plt.title('SVM Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

# Plot loss
plt.subplot(1, 2, 2)
plt.plot(range(1, 26), training_loss, label='Training Loss')
plt.plot(range(1, 26), testing_loss, label='Testing Loss')
plt.title('SVM Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Log Loss')
plt.legend()

plt.tight_layout()
plt.show()


ModuleNotFoundError: No module named 'pandas'