In [1]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load dataset
df = pd.read_csv("Myocardial Infarction.csv")

# Select top 15 relevant features (update based on your data)
selected_columns = [
    'AGE', 'SEX', 'SIM_GIPERT', 'STENOK_AN', 'FK_STENOK', 'IBS_POST',
    'IBS_NASL', 'K_BLOOD', 'L_BLOOD', 'ROE', 'S_AD_KBRIG', 'D_AD_KBRIG',
    'GIPO_K', 'GIPER_NA', 'LET_IS'  # Target variable
]

# Filter the dataset
df = df[selected_columns].dropna(subset=['LET_IS'])  # Drop rows where target is missing

# Define features (X) and target (y)
X = df.drop(columns=['LET_IS'])  # Input features
y = df['LET_IS']  # Target variable (0 = Low Risk, 1 = High Risk)

# Handle missing values
imputer = SimpleImputer(strategy='mean')  # Fill missing values with column mean
X_imputed = imputer.fit_transform(X)

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_imputed, y, test_size=0.2, random_state=42)

# Train a model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")

# Save trained model and imputer
joblib.dump(model, "myocardial_model.pkl")
joblib.dump(imputer, "imputer.pkl")

print("Model and imputer saved successfully!")


Model Accuracy: 0.87
Model and imputer saved successfully!


In [3]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load dataset
file_path = "Myocardial Infarction.csv"
df = pd.read_csv(file_path)

# Select relevant 14 features
selected_columns = [
    'AGE', 'SEX', 'SIM_GIPERT', 'STENOK_AN', 'FK_STENOK', 
    'IBS_POST', 'IBS_NASL', 'K_BLOOD', 'L_BLOOD', 'ROE', 
    'S_AD_KBRIG', 'D_AD_KBRIG', 'GIPO_K', 'GIPER_NA'
]

# Set the correct target column
target_column = "LET_IS"  # Updated based on dataset

# Ensure target column exists
if target_column not in df.columns:
    raise ValueError(f"Target column '{target_column}' not found in dataset!")

# Filter dataset
filtered_df = df[selected_columns + [target_column]].dropna()

# Split into input (X) and output (y)
X = filtered_df[selected_columns]
y = filtered_df[target_column]

# Handle missing values
imputer = SimpleImputer(strategy="mean")
X_imputed = imputer.fit_transform(X)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_imputed, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")

# Save trained model and imputer
joblib.dump(model, "myocardial_model.pkl")
joblib.dump(imputer, "imputer.pkl")
print("Model and imputer saved successfully!")


Model Accuracy: 1.00
Model and imputer saved successfully!
