# Zadanie 1

In [None]:
import pandas as pd

df = pd.read_csv('diabetes.csv')
X = df.drop('Outcome', axis=1)
y = df['Outcome']

In [3]:
from sklearn.model_selection import train_test_split
# 2. Podział na zbiory
X_train_full, X_test, y_train_full, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
X_train, X_val, y_train, y_val = train_test_split(X_train_full, y_train_full, test_size=0.25, random_state=42, stratify=y_train_full)
# Finalny podział: 60% train, 20% val, 20% test

In [4]:
from sklearn.preprocessing import StandardScaler

# 3. Skalowanie cech
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

In [5]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression

# 4. Modele
models = {
    'Logistic Regression': LogisticRegression(max_iter=1000),
    'GaussianNB': GaussianNB(),
    'Linear Discriminant Analysis': LinearDiscriminantAnalysis(),
    'Quadratic Discriminant Analysis': QuadraticDiscriminantAnalysis()
}

In [6]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# 5. Trenowanie i walidacja
results = []

for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_val_scaled)
    
    results.append({
        'Model': name,
        'Accuracy': accuracy_score(y_val, y_pred),
        'Precision': precision_score(y_val, y_pred),
        'Recall': recall_score(y_val, y_pred),
        'F1 Score': f1_score(y_val, y_pred)
    })

In [7]:
# 6. Porównanie modeli
results_df = pd.DataFrame(results)
print("Wyniki na zbiorze walidacyjnym:\n", results_df.sort_values('F1 Score', ascending=False))

Wyniki na zbiorze walidacyjnym:
                              Model  Accuracy  Precision    Recall  F1 Score
0              Logistic Regression  0.798701   0.767442  0.611111  0.680412
1                       GaussianNB  0.798701   0.767442  0.611111  0.680412
2     Linear Discriminant Analysis  0.798701   0.767442  0.611111  0.680412
3  Quadratic Discriminant Analysis  0.772727   0.711111  0.592593  0.646465


In [8]:
# 7. Ewaluacja najlepszego modelu na zbiorze testowym
best_model_name = results_df.sort_values('F1 Score', ascending=False).iloc[0]['Model']
best_model = models[best_model_name]

y_test_pred = best_model.predict(X_test_scaled)
print(f"\nNajlepszy model: {best_model_name}")
print("Wyniki na zbiorze testowym:")
print("Accuracy:", accuracy_score(y_test, y_test_pred))
print("Precision:", precision_score(y_test, y_test_pred))
print("Recall:", recall_score(y_test, y_test_pred))
print("F1 Score:", f1_score(y_test, y_test_pred))


Najlepszy model: Logistic Regression
Wyniki na zbiorze testowym:
Accuracy: 0.7272727272727273
Precision: 0.6363636363636364
Recall: 0.5185185185185185
F1 Score: 0.5714285714285714


# Zadanie 2

In [20]:
df = pd.read_csv('agaricus-lepiota.data', na_values='?')
df_clean = df.dropna(axis='rows')

In [21]:
X = df_clean.iloc[:, 1:]
y = df_clean.iloc[:, 0]

In [22]:
from sklearn.preprocessing import OrdinalEncoder

encoder = OrdinalEncoder()
X_encoded = encoder.fit_transform(X)

In [23]:
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

In [24]:
from sklearn.naive_bayes import CategoricalNB

model = CategoricalNB()
model.fit(X_train, y_train)

In [25]:
y_pred = model.predict(X_test)

In [26]:
from sklearn.metrics import confusion_matrix

# 8. Wyniki
print("Unikalne klasy w y_test:", set(y_test))
print("Unikalne klasy w y_pred:", set(y_pred))

print("Macierz omyłek:\n", confusion_matrix(y_test, y_pred))
print("Dokładność:", accuracy_score(y_test, y_pred))
print("F1-miarą:", f1_score(y_test, y_pred, pos_label='p'))

Unikalne klasy w y_test: {'p', 'e'}
Unikalne klasy w y_pred: {np.str_('p'), np.str_('e')}
Macierz omyłek:
 [[702   3]
 [ 35 389]]
Dokładność: 0.9663418954827281
F1-miarą: 0.9534313725490197
