In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
import pickle
import joblib


In [2]:
df = pd.read_csv(r'C:\Users\SreeKeerthiReddyThat\Downloads\framingham.csv')

In [3]:
features = ['male', 'age', 'education', 'currentSmoker', 'cigsPerDay', 'BPMeds',
            'prevalentStroke', 'prevalentHyp', 'diabetes', 'totChol', 'sysBP',
            'diaBP', 'BMI', 'heartRate', 'glucose']
target = 'TenYearCHD'

In [4]:
X = df[features]
y = df[target]

In [5]:
X = X.fillna(X.median())

In [6]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, stratify=y, test_size=0.2, random_state=42)


In [8]:
model = SVC(kernel='rbf', probability=True, random_state=42)
model.fit(X_train, y_train)


In [9]:
y_pred = model.predict(X_test)

print("✅ Accuracy:", accuracy_score(y_test, y_pred))
print("✅ Classification Report:\n", classification_report(y_test, y_pred))


✅ Accuracy: 0.8502358490566038
✅ Classification Report:
               precision    recall  f1-score   support

           0       0.85      1.00      0.92       719
           1       0.75      0.02      0.05       129

    accuracy                           0.85       848
   macro avg       0.80      0.51      0.48       848
weighted avg       0.84      0.85      0.79       848



In [10]:
with open('svm_model_bundle.pkl', 'wb') as f:
    pickle.dump({'model': model, 'scaler': scaler}, f)


In [11]:
joblib.dump({'model': model, 'scaler': scaler}, 'svm_model_bundle_joblib.pkl')


['svm_model_bundle_joblib.pkl']

In [12]:
with open('svm_model_bundle.pkl', 'rb') as f:
    bundle = pickle.load(f)


In [13]:
bundle = joblib.load('svm_model_bundle_joblib.pkl')


In [16]:
# Select a sample input (first row of X_test)
loaded_model_pickle = bundle['model']
loaded_scaler_pickle = bundle['scaler']

# Select a sample input (first row of X_test)
sample_input = X_test[0:1]

# Scale the sample input
sample_scaled = loaded_scaler_pickle.transform(sample_input)

# Predict using the loaded model
prediction = loaded_model_pickle.predict(sample_scaled)

# Output the prediction
print("Predicted TenYearCHD (from Pickle):", prediction[0])


Predicted TenYearCHD (from Pickle): 0




In [27]:
coefficients = model.coef_.flatten()

# Calculate the importance as percentage
coefficients_percent = 100 * np.abs(coefficients) / np.sum(np.abs(coefficients))

# Create a DataFrame for better visualization
feature_importance_df = pd.DataFrame({
    'Feature': features,
    'Importance (%)': coefficients_percent
})

# Sort by importance
feature_importance_df = feature_importance_df.sort_values(by='Importance (%)', ascending=True)

# Plot the feature importance
plt.figure(figsize=(10, 6))
plt.barh(feature_importance_df['Feature'], feature_importance_df['Importance (%)'], color='seagreen')
plt.xlabel('Importance (%)')
plt.title('Feature Importance for SVM Model (Linear Kernel)')
plt.grid(axis='x', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()


AttributeError: 'SVC' object has no attribute 'dual_coef_'

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix

y_train_pred = loaded_model_pickle.predict(X_train) 
y_test_pred = loaded_model_pickle.predict(X_test)

# Evaluation function for SVM model
def evaluate_model(y_true, y_pred, dataset_name):
    print(f"\n📊 Evaluation Metrics for {dataset_name}:")
    print(f"Accuracy  : {accuracy_score(y_true, y_pred):.4f}")
    print(f"Precision : {precision_score(y_true, y_pred):.4f}")
    print(f"Recall    : {recall_score(y_true, y_pred):.4f}")
    print(f"F1 Score  : {f1_score(y_true, y_pred):.4f}")
    print("\nConfusion Matrix:\n", confusion_matrix(y_true, y_pred))
    print("\nClassification Report:\n", classification_report(y_true, y_pred))


# Evaluate on both datasets
evaluate_model(y_train, y_train_pred, "Training Set")
evaluate_model(y_test, y_test_pred, "Testing Set")



📊 Evaluation Metrics for Training Set:
Accuracy  : 0.8617
Precision : 1.0000
Recall    : 0.0893
F1 Score  : 0.1640

Confusion Matrix:
 [[2877    0]
 [ 469   46]]

Classification Report:
               precision    recall  f1-score   support

           0       0.86      1.00      0.92      2877
           1       1.00      0.09      0.16       515

    accuracy                           0.86      3392
   macro avg       0.93      0.54      0.54      3392
weighted avg       0.88      0.86      0.81      3392


📊 Evaluation Metrics for Testing Set:
Accuracy  : 0.8502
Precision : 0.7500
Recall    : 0.0233
F1 Score  : 0.0451

Confusion Matrix:
 [[718   1]
 [126   3]]

Classification Report:
               precision    recall  f1-score   support

           0       0.85      1.00      0.92       719
           1       0.75      0.02      0.05       129

    accuracy                           0.85       848
   macro avg       0.80      0.51      0.48       848
weighted avg       0.84      0