In [2]:
import pandas as pd
import numpy as np

df = pd.read_csv('iris.csv')
df.columns.tolist()

['sepal.length', 'sepal.width', 'petal.length', 'petal.width', 'species']

In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import VotingClassifier

df = pd.read_csv('iris.csv')

X = df[['sepal.length', 'sepal.width', 'petal.length', 'petal.width']]
y = df['species']

le = LabelEncoder()
y = le.fit_transform(y)

lr_model = LogisticRegression(max_iter=200)
svc_model = SVC(probability=True, kernel='rbf')
knn_model = KNeighborsClassifier(n_neighbors=3)

lr_scores = cross_val_score(lr_model, X, y, cv=10, scoring='accuracy')
svc_scores = cross_val_score(svc_model, X, y, cv=10, scoring='accuracy')
knn_scores = cross_val_score(knn_model, X, y, cv=10, scoring='accuracy')

lr = lr_scores.mean()
svc = svc_scores.mean()
knn = knn_scores.mean()

print(f"Logistic Regression accuracy: {lr:.4f}")
print(f"SVC accuracy: {svc:.4f}")
print(f"KNN accuracy: {knn:.4f}")

hard_voting = VotingClassifier(estimators=[('lr', lr_model), ('svc', svc_model), ('knn', knn_model)], voting='hard')
hard_voting_scores = cross_val_score(hard_voting, X, y, cv=10, scoring='accuracy')
hard_v_cls = hard_voting_scores.mean()
print(f"Voting (Hard) accuracy: {hard_v_cls:.4f}")

soft_voting = VotingClassifier(estimators=[('lr', lr_model), ('svc', svc_model), ('knn', knn_model)], voting='soft')
soft_voting_scores = cross_val_score(soft_voting, X, y, cv=10, scoring='accuracy')
soft_v_cls = soft_voting_scores.mean()
print(f"Voting (Soft) accuracy: {soft_v_cls:.4f}")

weights = []
for i in range(1, 4):
    for j in range(1, 4):
        for k in range(1, 4):
            weights.append((i, j, k))

soft_voting = VotingClassifier(estimators=[('lr', lr_model), ('svc', svc_model), ('knn', knn_model)], voting='soft')

param_grid = {'weights': weights}
grid = GridSearchCV(soft_voting, param_grid, cv=10, scoring='accuracy')
grid.fit(X, y)

accuracy_gridcv = grid.best_score_
print(f"Best weighted voting accuracy: {accuracy_gridcv:.4f}")

# ====== Условия прохождения ======
print("\n--- КРИТЕРИИ ПРОХОЖДЕНИЯ ---")
print(f"lr >= 0.8: {lr >= 0.8}")
print(f"svc >= 0.8: {svc >= 0.8}")
print(f"knn >= 0.75: {knn >= 0.75}")
print(f"hard_v_cls >= 0.8: {hard_v_cls >= 0.8}")
print(f"soft_v_cls >= 0.8: {soft_v_cls >= 0.8}")
print(f"accuracy_gridcv >= 0.8: {accuracy_gridcv >= 0.8}")

Logistic Regression accuracy: 0.9733
SVC accuracy: 0.9733
KNN accuracy: 0.9667
Voting (Hard) accuracy: 0.9733
Voting (Soft) accuracy: 0.9667
Best weighted voting accuracy: 0.9733

--- КРИТЕРИИ ПРОХОЖДЕНИЯ ---
lr >= 0.8: True
svc >= 0.8: True
knn >= 0.75: True
hard_v_cls >= 0.8: True
soft_v_cls >= 0.8: True
accuracy_gridcv >= 0.8: True


In [4]:
import pandas as pd
import numpy as np

df = pd.read_csv('heart.csv')
df.columns.tolist()

['Age',
 'Sex',
 'ChestPainType',
 'RestingBP',
 'Cholesterol',
 'FastingBS',
 'RestingECG',
 'MaxHR',
 'ExerciseAngina',
 'Oldpeak',
 'ST_Slope',
 'HeartDisease']

In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import roc_auc_score, roc_curve

DATA_PATH = 'heart.csv'
df = pd.read_csv(DATA_PATH)
print(f"Данные загружены: {df.shape[0]} строк, {df.shape[1]} столбцов")

numeric_cols = df.select_dtypes(include=[np.number]).columns
for col in numeric_cols:
    Q1 = df[col].quantile(0.25)
    Q3 = df[col].quantile(0.75)
    IQR = Q3 - Q1
    lower = Q1 - 1.5 * IQR
    upper = Q3 + 1.5 * IQR
    df = df[(df[col] >= lower) & (df[col] <= upper)]

print(f"После удаления выбросов: {df.shape[0]} строк")

cat_cols = df.select_dtypes(include=['object']).columns
for col in cat_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])

X = df.drop('HeartDisease', axis=1)
y = df['HeartDisease']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

lr_model = LogisticRegression(solver='lbfgs', multi_class='multinomial', max_iter=200)
knn_model = KNeighborsClassifier()
svc_model = SVC(gamma='auto', probability=True)

hard_voting = VotingClassifier(estimators=[('lr', lr_model), ('knn', knn_model), ('svc', svc_model)], voting='hard')
VC_hard_cv_score = cross_val_score(hard_voting, X_train, y_train, cv=5, scoring='recall').mean()
print(f"VotingClassifier (hard) recall (CV=5): {VC_hard_cv_score:.4f}")

soft_voting = VotingClassifier(estimators=[('lr', lr_model), ('knn', knn_model), ('svc', svc_model)], voting='soft')
VC_soft_cv_score = cross_val_score(soft_voting, X_train, y_train, cv=5, scoring='recall').mean()
print(f"VotingClassifier (soft) recall (CV=5): {VC_soft_cv_score:.4f}")

soft_voting.fit(X_train, y_train)
y_pred_proba = soft_voting.predict_proba(X_test)[:, 1]
ROCAUCscore = roc_auc_score(y_test, y_pred_proba)
print(f"ROC AUC Score: {ROCAUCscore:.4f}")


Данные загружены: 918 строк, 12 столбцов
После удаления выбросов: 587 строк




VotingClassifier (hard) recall (CV=5): 0.8300
VotingClassifier (soft) recall (CV=5): 0.8400
ROC AUC Score: 0.9162


