In [None]:
import os
import json
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from keras.preprocessing import image
from keras.utils import to_categorical
from keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from keras.models import Model
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint,ReduceLROnPlateau, EarlyStopping
from concurrent.futures import ThreadPoolExecutor, as_completed
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt

In [None]:
# Google Drive Mount
from google.colab import drive
drive.mount('/content/drive')


In [None]:
%cd '/content/drive/My Drive/Deep_Learning_homework/Data'

In [None]:
# Veri yolları
csv_path = "new_train.csv"
json_path = "label_num_to_disease_map.json"
image_folder = "train_images"
# Sınıf adları
with open(json_path, 'r') as f:
    class_names = json.load(f)

In [None]:
# Veri yollarını yükleyin
df = pd.read_csv(csv_path)

# **Etiket Sayısı hesaplama**

In [None]:
# Label sütunundaki değerleri say
label_counts = df['label'].value_counts()

# Etiketlerin sayısını yazdır
print("Etiket 0 olanların sayısı:", label_counts.get(0, 0))
print("Etiket 1 olanların sayısı:", label_counts.get(1, 0))
print("Etiket 2 olanların sayısı:", label_counts.get(2, 0))
print("Etiket 3 olanların sayısı:", label_counts.get(3, 0))
print("Etiket 4 olanların sayısı:", label_counts.get(4, 0))

In [None]:
# Her sınıf için resim ve labellı resim sayılarını hesapla
class_counts = {}
for label, class_name in class_names.items():
    class_images = len(df[df['label'] == int(label)])
    labeled_images = class_images
    class_counts[class_name] = {'images': class_images, 'labeled_images': labeled_images}

    print(f"Sınıf: {class_name}, Toplam Resim Sayısı: {class_images}, Etiketli Resim Sayısı: {labeled_images}")

In [None]:
import os

# train.csv dosyasının boyutunu yazdır
csv_size = os.path.getsize(csv_path)
print(f"train.csv dosyasının boyutu: {csv_size} bytes")

# image_folder içindeki resim sayısını hesapla
image_files = os.listdir(image_folder)
num_images = len(image_files)
print(f"{image_folder} klasöründe {num_images} resim bulunmaktadır.")


In [None]:
import os

image_folder = "train_images"
image_id = "1000015157.jpg"

img_path = os.path.join(image_folder, image_id)

if os.path.exists(img_path):
    print(f"{image_id} dosyası {image_folder} klasöründe bulunuyor.")
else:
    print(f"{image_id} dosyası {image_folder} klasöründe bulunmuyor.")


In [None]:
import pandas as pd

csv_path = "train.csv"
image_id_to_check = "1000015157.jpg"

# CSV dosyasını oku
df = pd.read_csv(csv_path)

# image_id sütununda belirtilen dosyanın olup olmadığını kontrol et
if image_id_to_check in df['image_id'].values:
    print(f"{image_id_to_check} dosyası train.csv dosyasında bulunuyor.")
else:
    print(f"{image_id_to_check} dosyası train.csv dosyasında bulunmuyor.")


In [None]:
import pandas as pd
import os

# Boş bir DataFrame oluştur
new_df = pd.DataFrame(columns=['image_id', 'label'])

df = pd.read_csv(csv_path)


for image_id, label in zip(df['image_id'], df['label']):
    img_path = os.path.join(image_folder, image_id)

    # Eğer dosya mevcutsa işleme devam et, aksi takdirde atla
    if os.path.exists(img_path):
        # Yeni bir satır oluştur
        new_row = pd.DataFrame({'image_id': [image_id], 'label': [label]})
        # Mevcut DataFrame'e yeni satırı ekle
        new_df = pd.concat([new_df, new_row], ignore_index=True)
# Sonuçları göster
print(new_df)


In [None]:
df.shape[0]

In [None]:
new_df.to_csv('new_train.csv', index=False)

# **Veri dengesizliği**

In [None]:
import pandas as pd
from sklearn.utils import resample

In [None]:
# Etiketlere göre veri setini bölün
df_class_0 = df[df['label'] == 0]
df_class_3 = df[df['label'] == 3]

# Diğer sınıfları ayırın
df_others = df[(df['label'] != 0) & (df['label'] != 3)]


In [None]:
# 0 etiketli sınıfı 2000 örneğe ayarlayın
if len(df_class_0) < 2000:
    df_class_0_resampled = resample(df_class_0, replace=True, n_samples=2000, random_state=123)
else:
    df_class_0_resampled = resample(df_class_0, replace=False, n_samples=2000, random_state=123)

# 3 etiketli sınıfı 2000 örneğe ayarlayın
if len(df_class_3) < 2000:
    df_class_3_resampled = resample(df_class_3, replace=True, n_samples=2000, random_state=123)
else:
    df_class_3_resampled = resample(df_class_3, replace=False, n_samples=2000, random_state=123)


In [None]:
# Yeniden örneklenmiş veri setlerini birleştirin
df_resampled = pd.concat([df_class_0_resampled, df_class_3_resampled, df_others])

# Karışık hale getirmek için verileri karıştırın
df_resampled = df_resampled.sample(frac=1, random_state=123).reset_index(drop=True)

# Sonuçları kontrol edin
print(df_resampled['label'].value_counts())


resample all

In [None]:
import pandas as pd
from sklearn.utils import resample

# Veri yolları
csv_path = "new_train.csv"
json_path = "label_num_to_disease_map.json"
image_folder = "train_images"

# Sınıf adlarını yükleyin
with open(json_path, 'r') as f:
    class_names = json.load(f)

# Veri setini yükleyin
df = pd.read_csv(csv_path)


In [None]:
# Hedef örnek sayısı
target_count = 1000

# Tüm sınıflar için yeniden örnekleme işlemi
df_resampled = pd.DataFrame()

for label in df['label'].unique():
    df_class = df[df['label'] == label]

    if len(df_class) < target_count:
        df_class_resampled = resample(df_class, replace=True, n_samples=target_count, random_state=123)
    else:
        df_class_resampled = resample(df_class, replace=False, n_samples=target_count, random_state=123)

    df_resampled = pd.concat([df_resampled, df_class_resampled])

# Karışık hale getirmek için verileri karıştırın
df_resampled = df_resampled.sample(frac=1, random_state=123).reset_index(drop=True)

# Sonuçları kontrol edin
print(df_resampled['label'].value_counts())


In [None]:
# Yeniden örneklenen veri setini kontrol edin
print(df_resampled.head())

# Yeniden örneklenen veri setini kaydedin
df_resampled.to_csv("resampled_train.csv", index=False)


# **2**

In [None]:
import torch
import pandas as pd
import json
from sklearn.utils import resample
from torchvision import transforms, datasets
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import os

In [None]:
# Sadece etiket 1'e ait verileri alın
df_class_1 = df[df['label'] == 1]

# Etiket 1 sınıfını 2000 örneğe ayarlayın
df_class_1_resampled = resample(df_class_1, replace=True, n_samples=2000, random_state=123)

In [None]:
# Veri seti sınıfı
class CassavaDataset(Dataset):
    def __init__(self, dataframe, image_folder, transform=None):
        self.dataframe = dataframe
        self.image_folder = image_folder
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = os.path.join(self.image_folder, self.dataframe.iloc[idx, 0])
        image = Image.open(img_name).convert('RGB')
        label = self.dataframe.iloc[idx, 1]

        if self.transform:
            image = self.transform(image)

        return image, label

# Transformlar
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])

# Dataset ve DataLoader
dataset = CassavaDataset(dataframe=df_class_1_resampled, image_folder=image_folder, transform=transform)
dataloader = DataLoader(dataset, batch_size=64, shuffle=True)

In [None]:
import torch.nn as nn
import torch.optim as optim

class Generator(nn.Module):
    def __init__(self, latent_dim):
        super(Generator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(latent_dim, 128 * 16 * 16),
            nn.ReLU(True),
            nn.BatchNorm1d(128 * 16 * 16),
            nn.Unflatten(1, (128, 16, 16)),
            nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1),
            nn.ReLU(True),
            nn.BatchNorm2d(64),
            nn.ConvTranspose2d(64, 3, kernel_size=4, stride=2, padding=1),
            nn.Tanh()
        )

    def forward(self, x):
        return self.model(x)

class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=4, stride=2, padding=1),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Flatten(),
            nn.Linear(128 * 32 * 32, 1),  # Input shape should match the output shape of previous layers
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)

latent_dim = 100
generator = Generator(latent_dim)
discriminator = Discriminator()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
generator = generator.to(device)
discriminator = discriminator.to(device)

criterion = nn.BCELoss()
optimizer_G = optim.Adam(generator.parameters(), lr=0.0002)
optimizer_D = optim.Adam(discriminator.parameters(), lr=0.0002)


In [None]:
num_epochs = 50

for epoch in range(num_epochs):
    for i, (imgs, _) in enumerate(dataloader):
        # Gerçek görüntüler
        real_imgs = imgs.to(device)
        real_labels = torch.ones(imgs.size(0), 1).to(device)
        fake_labels = torch.zeros(imgs.size(0), 1).to(device)

        # Generator için random noise
        z = torch.randn(imgs.size(0), latent_dim).to(device)
        gen_imgs = generator(z)

        # Discriminator'ı eğit
        optimizer_D.zero_grad()
        real_loss = criterion(discriminator(real_imgs), real_labels)
        fake_loss = criterion(discriminator(gen_imgs.detach()), fake_labels)
        d_loss = real_loss + fake_loss
        d_loss.backward()
        optimizer_D.step()

        # Generator'ı eğit
        optimizer_G.zero_grad()
        g_loss = criterion(discriminator(gen_imgs), real_labels)
        g_loss.backward()
        optimizer_G.step()

    print(f"Epoch {epoch+1}/{num_epochs} | D Loss: {d_loss.item()} | G Loss: {g_loss.item()}")


In [None]:
import matplotlib.pyplot as plt
from torchvision.utils import save_image

# Eğitim sonrası görüntü üretimi
def generate_images(generator, latent_dim, n_images=64):
    generator.eval()
    z = torch.randn(n_images, latent_dim).to(device)
    gen_imgs = generator(z).view(n_images, 3, 128, 128).cpu()
    generator.train()
    return gen_imgs

# Örnek görüntü üretimi ve kaydetme
gen_imgs = generate_images(generator, latent_dim)
save_image(gen_imgs, "generated_images.png", nrow=8, normalize=True)

# Görüntülerin görselleştirilmesi
fig, axes = plt.subplots(8, 8, figsize=(10, 10))
for img, ax in zip(gen_imgs, axes.flatten()):
    ax.imshow(img.permute(1, 2, 0).detach().numpy() * 0.5 + 0.5)
    ax.axis('off')
plt.show()


# **Preprocessing**

In [None]:
import os
import pandas as pd
import cv2


# Veri ve etiket listelerini başlat
data = []
labels = []
success_count = 0
skip_count = 0

def process_image(image_id, label):
    img_path = os.path.join(image_folder, image_id)

    if os.path.exists(img_path):
        img = cv2.imread(img_path)
        if img is not None:
            resized_img = cv2.resize(img, (224, 224))
            return (resized_img, label, True)
    return (None, label, False)

# Paralel işleme için ThreadPoolExecutor kullan
with ThreadPoolExecutor() as executor:
    futures = [executor.submit(process_image, image_id, label) for image_id, label in zip(df_resampled['image_id'], df_resampled['label'])]

    for future in as_completed(futures):
        result, label, success = future.result()
        if success:
            data.append(result)
            labels.append(label)
            success_count += 1
        else:
            skip_count += 1

# İşlemler tamamlandıktan sonra sonuçları yazdır
print(f"Başarıyla eklenen resim sayısı: {success_count}")
print(f"Atlanan resim sayısı: {skip_count}")

In [None]:
data = np.array(data)
labels = np.array(labels)

In [None]:

# Data and Labels Collection
data = []
labels = []
success_count = 0
skip_count = 0

df = pd.read_csv("new_train.csv")

for image_id, label in zip(df['image_id'], df['label']):
    img_path = os.path.join(image_folder, image_id)

    # Eğer dosya mevcutsa işleme devam et, aksi takdirde atla
    if os.path.exists(img_path):
        img = cv2.imread(img_path)
        resized_img = cv2.resize(img, (224, 224))
        data.append(resized_img)
        labels.append(label)
        success_count += 1
    else:
        print(f"Uyarı: {img_path} bulunamadı, bu nedenle atlandı.")
        skip_count += 1

# Convert to NumPy arrays
data = np.array(data)
labels = np.array(labels)

print(f"Başarılı işlenen fotoğraf sayısı: {success_count}")
print(f"Atlanan fotoğraf sayısı: {skip_count}")

In [None]:
#Train-test split
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

In [None]:
y_train = y_train.astype(int)
y_test = y_test.astype(int)

num_classes = len(class_names)

# Convert training labels to one-hot encoding
y_train_one_hot = to_categorical(y_train, num_classes)

# Convert testing labels to one-hot encoding
y_test_one_hot = to_categorical(y_test, num_classes)

In [None]:
y_train_one_hot.shape,y_test_one_hot.shape

In [None]:
y_train,y_test

# **Model Oluşturma**

In [None]:
def vggnet(input_size=(224,224,3),num_classes=5):
    inputs = Input(input_size)

    # Layer 1
    conv1 = Conv2D(filters=64, kernel_size=(3,3), padding="same", activation="relu")(inputs)
    conv2  = Conv2D(filters=64, kernel_size=(3,3), padding="same", activation="relu")(conv1)
    pool1  = MaxPooling2D((2, 2))(conv2)

    conv3  = Conv2D(filters=128, kernel_size=(3,3), padding="same", activation="relu")(pool1)
    conv4  = Conv2D(filters=128, kernel_size=(3,3), padding="same", activation="relu")(conv3)
    pool2  = MaxPooling2D((2, 2))(conv4)

    conv5  = Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu")(pool2)
    conv6  = Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu")(conv5)
    conv7  = Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu")(conv6)
    pool3  = MaxPooling2D((2, 2))(conv7)

    conv8  = Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu")(pool3)
    conv9  = Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu")(conv8)
    conv10 = Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu")(conv9)
    pool4  = MaxPooling2D((2, 2))(conv10)

    conv11 = Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu")(pool4)
    conv12 = Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu")(conv11)
    conv13 = Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu")(conv12)
    pool5  = MaxPooling2D((2, 2))(conv13)


    # Flatten
    flatten = Flatten()(pool5)

    # Fully connected layers
    dense1 = Dense(4096, activation='relu')(flatten)

    dense2 = Dense(4096, activation='relu')(dense1)

    # Output layer
    output = Dense(num_classes, activation='softmax')(dense2)

    model = Model(inputs=inputs, outputs=output)

    return model



In [None]:
# Build vggnet Model
vggnet_model = vggnet(num_classes=5)

# Print Model Summary
vggnet_model.summary()
# Compile Model
vggnet_model.compile(optimizer= Adam(0.01),
                    loss='categorical_crossentropy',
                    metrics=['accuracy'])


In [None]:
from keras.applications import VGG16
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten

#Pre-trained VGG-16 modelini yükle
vgg16 = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Yeni bir Sequential modeli oluştur
model = Sequential()

# VGG-16'nın katmanlarını ekle
for layer in vgg16.layers:
    model.add(layer)

# Yeni top layerları ekle
model.add(Flatten())
model.add(Dense(4096, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(4096, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(5, activation='softmax'))

# Modeli derle
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
checkpoint = ModelCheckpoint("best_vggnet_model_weights.h5",
                             monitor='val_acc',  # Kaydetme kriteri olarak doğruluk metriğini kullanın
                             verbose=1,
                             save_best_only=True,    # Sadece en iyi performansı gösteren ağırlıkları kaydedin
                             mode='max')
# Callbacks
reduce_lr = ReduceLROnPlateau(monitor='val_loss', patience=5, mode='auto', factor=0.1, min_lr=0.000001)
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

In [None]:
# Modeli eğit
history=model.fit(X_train, y_train_one_hot, epochs=100, batch_size=32, validation_split=0.2, verbose=1,callbacks=[reduce_lr,checkpoint,early_stopping])

In [None]:
# Model Evaluation
evaluation = model.evaluate(X_test, y_test_one_hot)
test_accuracy = evaluation[1]
print("Test accuracy:", test_accuracy)


In [None]:

# Tahminleri yap
predicted_probabilities = model.predict(X_test)

# Tahmin edilen olasılıklardan sadece pozitif sınıfın olasılıklarını seç
predicted_positive_probabilities = predicted_probabilities[:, 1]

# FPR, TPR ve eşik değerlerini hesapla
fpr, tpr, thresholds = roc_curve(y_test_one_hot[:, 1], predicted_positive_probabilities)

# ROC eğrisi altında alanı hesapla
roc_auc = auc(fpr, tpr)

# ROC eğrisini çiz
plt.figure(figsize=(10, 8))
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (AUC = {:.2f})'.format(roc_auc))
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC)')
plt.legend(loc="lower right")
plt.show()


In [None]:
from sklearn.metrics import confusion_matrix

predictions = model.predict(X_test)
predicted_labels = np.argmax(predictions, axis=1)
true_labels = np.argmax(y_test_one_hot, axis=1)

cm = confusion_matrix(true_labels, predicted_labels)
print("Confusion Matrix:")
print(cm)

In [None]:
# Grafik boyutunu ayarlayın
plt.figure(figsize=(20, 12))
# Extracting training history

train_accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']
train_loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(train_accuracy) + 1)

# Accuracy plot
plt.subplot(2, 2, 1)
plt.plot(epochs, train_accuracy, 'b', label='Training Accuracy')
plt.plot(epochs, val_accuracy, 'r', label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.xticks(epochs)  # Set the x-axis ticks explicitly

# Loss plot
plt.subplot(2, 2, 2)
plt.plot(epochs, train_loss, 'b', label='Training Loss')
plt.plot(epochs, val_loss, 'r', label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.xticks(epochs)  # Set the x-axis ticks explicitly

plt.tight_layout()
plt.show()

In [None]:
from sklearn.metrics import classification_report

predictions = model.predict(X_test)

predicted_labels = np.argmax(predictions, axis=1)

true_labels = np.argmax(y_test_one_hot, axis=1)

class_report = classification_report(true_labels, predicted_labels)

# Print the classification report
print("Classification Report:")
print(class_report)


In [None]:
# Karışıklık matrisini görselleştir
import seaborn as sns
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 8))
hm = sns.heatmap(cm, annot=True, fmt="d", cmap="YlOrRd", xticklabels=class_names, yticklabels=class_names)
hm.tick_params(labeltop=True, labelbottom=False, top=True, bottom=False)
plt.xlabel('AI Prediction')
plt.ylabel('Actual Label')
plt.show()