In [2]:
# model_training.ipynb
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
import joblib



In [3]:
# Load dataset
df = pd.read_csv("/content/pronostico_dataset (1) (1).csv", delimiter=";")

In [4]:
# Features & target
X = df[['age', 'systolic_bp', 'diastolic_bp', 'cholesterol']]
y = df['prognosis']

In [5]:
# Encode target
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)


In [6]:
# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42
)

In [7]:
# Initialize models
models = {
    "SVM": SVC(kernel='rbf', probability=True, random_state=42),
    "RandomForest": RandomForestClassifier(n_estimators=100, random_state=42),
    "LogisticRegression": LogisticRegression(max_iter=1000, random_state=42)
}

In [8]:
# Dictionary to store accuracies
model_scores = {}

In [9]:
# Train, evaluate, and save models
for name, model in models.items():
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    acc = accuracy_score(y_test, preds)
    model_scores[name] = acc
    joblib.dump(model, f"{name}_model.pkl")


In [10]:
# Save label encoder and model scores
joblib.dump(label_encoder, "label_encoder.pkl")
joblib.dump(model_scores, "model_scores.pkl")

['model_scores.pkl']

In [11]:
print("All models, label encoder, and scores saved successfully.")
print("Model Accuracies:", model_scores)


All models, label encoder, and scores saved successfully.
Model Accuracies: {'SVM': 0.7533333333333333, 'RandomForest': 0.7366666666666667, 'LogisticRegression': 0.7466666666666667}
