In [None]:
from tensorflow.keras.datasets import mnist
import numpy as np

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, log_loss
import matplotlib.pyplot as plt
from IPython.display import clear_output


In [None]:
# Ladda MNIST igen
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Normalisering
x_train = x_train.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0

# Flatten: 28x28 -> 784
# -1 betyder "beräkna automatiskt" bildens totala storlek (some magic)
x_train = x_train.reshape(x_train.shape[0], -1)
x_test = x_test.reshape(x_test.shape[0], -1)

x_train.shape, x_test.shape


In [None]:

# 3. Visa inlärningskurva (log loss)
x_subset = x_train[:5000]
y_subset = y_train[:5000]

losses = []

model = LogisticRegression(max_iter=1, warm_start=True, solver="lbfgs")
# model = LogisticRegression(
#     max_iter=1000,
#     C=1.0,
#     solver="lbfgs", quasi-Newton-metod,
#     multi_class="multinomial",
#     n_jobs=-1
# )
# Modellen tränades med max_iter=1000 för att säkerställa konvergens.
# Regulariseringsparametern C sattes till 1.0 för att bibehålla en god balans mellan
# generalisering och träningsnoggrannhet. Optimeraren lbfgs användes då den lämpar
# sig väl för flerkklassklassificering.



for i in range(70):
    model.fit(x_subset, y_subset)

    probs = model.predict_proba(x_subset)
    loss = log_loss(y_subset, probs)
    losses.append(loss)

    clear_output(wait=True)

    plt.figure(figsize=(8, 4))

    # ✅ ALLA punkter + linje (trend)
    plt.plot(losses, marker="x", label="Log Loss")

    plt.xlabel("Iteration")
    plt.ylabel("Log Loss")
    plt.title("Inlärningskurva för Logistic Regression")
    plt.grid(True)
    plt.legend()
    plt.show()

# 1. Modell Logistic Regression med fler iterationer
model = LogisticRegression(max_iter=1000)

# 2. Träna modellen med all data
model.fit(x_train, y_train)


In [None]:
index = 56  # den testbild du vill visa

single_image = x_test[index]   # form: (784,)
single_label = y_test[index]  # facit
y_pred = model.predict([single_image])  # form: (1, 784)
acc = accuracy_score([single_label], y_pred)
print("Predicted label:", y_pred[0])
print("True label:", single_label)
probs = model.predict_proba([single_image])[0]
print("Predicted probabilities:", probs)
print("Accuracy for this single image:", acc)


# Plocka ut bilden och gör om den till 28x28 igen
image = x_test[index].reshape(28, 28)
label = y_test[index]

plt.imshow(image, cmap="gray")
plt.title(f"Rätt siffra: {label}")
plt.axis("off")
plt.show()

plt.imshow(image, cmap="gray")
plt.title(f"Rätt: {single_label} | Gissning: {y_pred[0]}")
plt.axis("off")
plt.show()

fig, axes = plt.subplots(2, 5, figsize=(12, 6))
weights = model.coef_.reshape(10, 28, 28)

for i, ax in enumerate(axes.flat):
    ax.imshow(weights[i], cmap="seismic")
    ax.set_title(f"Vikt-karta för siffra {i}")
    ax.axis("off")

plt.tight_layout()
plt.show()




In [None]:
# Hämta sannolikheter (0–1)
probs = model.predict_proba([single_image])[0]

# Konvertera till procent
probs_percent = probs * 100

# Plot
plt.figure(figsize=(8, 4))
bars = plt.bar(range(10), probs_percent)

plt.xlabel("Siffra")
plt.ylabel("Sannolikhet (%)")
plt.title("Modellens sannolikhetsfördelning")
plt.xticks(range(10))

# Skriv ut procentvärde ovanpå varje stapel (2 decimaler)
for i, val in enumerate(probs_percent):
    plt.text(i, val + 0.5, f"{val:.2f}%", ha="center", va="bottom", fontsize=9)

plt.ylim(0, 100)
plt.show()


In [None]:
y_pred = model.predict(x_test)   # <- ALLA 10 000 testbilder
acc = accuracy_score(y_test, y_pred)
print("Total accuracy on test set:", acc)

In [None]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred))


In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=False, cmap="Blues")
plt.xlabel("Predicted label")
plt.ylabel("True label")
plt.title("Confusion Matrix – Logistic Regression")
plt.show()
