In [None]:
import time
from sklearn.linear_model import LogisticRegression
from PIL import Image
import numpy as np
import os
from sklearn.metrics import accuracy_score

model = LogisticRegression(
    penalty='l2',
    C=0.1,
    solver='saga',
    max_iter=50,
    class_weight='balanced',
    tol=1e-4,
    warm_start=True,
    verbose=1,
    fit_intercept=True,
    random_state=42,
    n_jobs=-1
)

def initData(datatype):

    x = []  # Images
    y = []  # Labels

    normalisation = 255.0
    resize_param = 30

    for label, category in enumerate(["NORMAL", "PNEUMONIA"]):
        folder = f"chest_Xray/{datatype}/{category}"
        for filename in os.listdir(folder):
            img_path = os.path.join(folder, filename)
            img = Image.open(img_path).convert("L")
            img = img.resize((resize_param, resize_param))
            img_array = np.array(img).flatten()
            x.append(img_array)
            y.append(label)

    return np.array(x) / normalisation, np.array(y)

##### MAIN #####

if __name__ == "__main__":

    #### Traing data initialization ####
    
    print("Initializing training data")
    start_trainingData = time.time()
    x_train, y_train = initData("train")
    end_trainingData = time.time() - start_trainingData
    print("Training data initialized in", end_trainingData, "seconds")


    #### Test data initialization ####
    
    print("Initializing test data")
    start_testData = time.time()
    x_test, y_test = initData("test")
    end_testData = time.time() - start_testData
    print("Test data initialized in", end_testData, "seconds")


    #### Validation data initialization ####

    print("Initializing validation data")
    start_valData = time.time()
    x_val, y_val = initData("val")
    end_valData = time.time() - start_valData
    print("Validation data initialized in", end_valData, "seconds")
    
    
    #### Model training ####

    start_trainingTime = time.time()
    model.fit(x_train, y_train)
    end_trainingTime = time.time()
    
    
    #### Prediction ####

    print(f"Temps d'entraînement : {end_trainingTime - start_trainingTime:.2f} secondes")

    y_pred = model.predict(x_test)
    print("Précision sur le set de test :", accuracy_score(y_test, y_pred))

    y_eval_pred = model.predict(x_val)
    print("Précision sur le set d'évaluation :", accuracy_score(y_val, y_eval_pred))

    print("Arret de l'entrainement à l'itération :", model.n_iter_)

In [None]:
import time
from sklearn.linear_model import LogisticRegression
from PIL import Image
import numpy as np
import os
from sklearn.metrics import accuracy_score

model = LogisticRegression(
    penalty='l2',
    C=0.1,
    solver='saga',
    max_iter=50,
    class_weight='balanced',
    tol=1e-4,
    warm_start=True,
    verbose=1,
    fit_intercept=True,
    random_state=42,
    n_jobs=-1
)

def initData(datatype):

    x = []  # Images
    y = []  # Labels

    normalisation = 255.0
    resize_param = 20

    for label, category in enumerate(["NORMAL", "PNEUMONIA"]):
        folder = f"chest_Xray/{datatype}/{category}"
        for filename in os.listdir(folder):
            img_path = os.path.join(folder, filename)
            img = Image.open(img_path).convert("L")
            img = img.resize((resize_param, resize_param))
            img_array = np.array(img).flatten()
            x.append(img_array)
            y.append(label)

    return np.array(x) / normalisation, np.array(y)

##### MAIN #####

if __name__ == "__main__":

    #### Traing data initialization ####

    print("Initializing training data")
    start_trainingData = time.time()
    x_train, y_train = initData("train")
    end_trainingData = time.time() - start_trainingData
    print("Training data initialized in", end_trainingData, "seconds")


    #### Test data initialization ####

    print("Initializing test data")
    start_testData = time.time()
    x_test, y_test = initData("test")
    end_testData = time.time() - start_testData
    print("Test data initialized in", end_testData, "seconds")


    #### Validation data initialization ####

    print("Initializing validation data")
    start_valData = time.time()
    x_val, y_val = initData("val")
    end_valData = time.time() - start_valData
    print("Validation data initialized in", end_valData, "seconds")


    #### Model training ####

    start_trainingTime = time.time()
    model.fit(x_train, y_train)
    end_trainingTime = time.time()


    #### Prediction ####

    print(f"Temps d'entraînement : {end_trainingTime - start_trainingTime:.2f} secondes")

    y_pred = model.predict(x_test)
    print("Précision sur le set de test :", accuracy_score(y_test, y_pred))

    y_eval_pred = model.predict(x_val)
    print("Précision sur le set d'évaluation :", accuracy_score(y_val, y_eval_pred))

    print("Arret de l'entrainement à l'itération :", model.n_iter_)

In [None]:
import time
from sklearn.linear_model import LogisticRegression
from PIL import Image
import numpy as np
import os
from sklearn.metrics import accuracy_score

model = LogisticRegression(
    penalty='l2',
    C=0.1,
    solver='saga',
    max_iter=50,
    class_weight='balanced',
    tol=1e-4,
    warm_start=True,
    verbose=1,
    fit_intercept=True,
    random_state=42,
    n_jobs=-1
)

def initData(datatype):

    x = []  # Images
    y = []  # Labels

    normalisation = 255.0
    resize_param = 15

    for label, category in enumerate(["NORMAL", "PNEUMONIA"]):
        folder = f"chest_Xray/{datatype}/{category}"
        for filename in os.listdir(folder):
            img_path = os.path.join(folder, filename)
            img = Image.open(img_path).convert("L")
            img = img.resize((resize_param, resize_param))
            img_array = np.array(img).flatten()
            x.append(img_array)
            y.append(label)

    return np.array(x) / normalisation, np.array(y)

##### MAIN #####

if __name__ == "__main__":

    #### Traing data initialization ####

    print("Initializing training data")
    start_trainingData = time.time()
    x_train, y_train = initData("train")
    end_trainingData = time.time() - start_trainingData
    print("Training data initialized in", end_trainingData, "seconds")


    #### Test data initialization ####

    print("Initializing test data")
    start_testData = time.time()
    x_test, y_test = initData("test")
    end_testData = time.time() - start_testData
    print("Test data initialized in", end_testData, "seconds")


    #### Validation data initialization ####

    print("Initializing validation data")
    start_valData = time.time()
    x_val, y_val = initData("val")
    end_valData = time.time() - start_valData
    print("Validation data initialized in", end_valData, "seconds")


    #### Model training ####

    start_trainingTime = time.time()
    model.fit(x_train, y_train)
    end_trainingTime = time.time()


    #### Prediction ####

    print(f"Temps d'entraînement : {end_trainingTime - start_trainingTime:.2f} secondes")

    y_pred = model.predict(x_test)
    print("Précision sur le set de test :", accuracy_score(y_test, y_pred))

    y_eval_pred = model.predict(x_val)
    print("Précision sur le set d'évaluation :", accuracy_score(y_val, y_eval_pred))

    print("Arret de l'entrainement à l'itération :", model.n_iter_)

In [None]:
import time
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

model = LogisticRegression(
    penalty='l2',
    C=0.1,
    solver='saga',
    max_iter=50,
    class_weight='balanced',
    tol=1e-4,
    warm_start=True,
    verbose=1,
    fit_intercept=True,
    random_state=42,
    n_jobs=-1
)

def initData(datatype):

    x = []  # Images
    y = []  # Labels

    normalisation = 255.0
    resize_param = 18

    for label, category in enumerate(["NORMAL", "PNEUMONIA"]):
        folder = f"chest_Xray/{datatype}/{category}"
        for filename in os.listdir(folder):
            img_path = os.path.join(folder, filename)
            img = Image.open(img_path).convert("L")
            img = img.resize((resize_param, resize_param))
            img_array = np.array(img).flatten()
            x.append(img_array)
            y.append(label)

    return np.array(x) / normalisation, np.array(y)

##### MAIN #####

if __name__ == "__main__":

    #### Traing data initialization ####

    print("Initializing training data")
    start_trainingData = time.time()
    x_train, y_train = initData("train")
    end_trainingData = time.time() - start_trainingData
    print("Training data initialized in", end_trainingData, "seconds")


    #### Test data initialization ####

    print("Initializing test data")
    start_testData = time.time()
    x_test, y_test = initData("test")
    end_testData = time.time() - start_testData
    print("Test data initialized in", end_testData, "seconds")


    #### Validation data initialization ####

    print("Initializing validation data")
    start_valData = time.time()
    x_val, y_val = initData("val")
    end_valData = time.time() - start_valData
    print("Validation data initialized in", end_valData, "seconds")


    #### Model training ####

    start_trainingTime = time.time()
    model.fit(x_train, y_train)
    end_trainingTime = time.time()


    #### Prediction ####

    print(f"Temps d'entraînement : {end_trainingTime - start_trainingTime:.2f} secondes")

    y_pred = model.predict(x_test)
    print("Précision sur le set de test :", accuracy_score(y_test, y_pred))

    y_eval_pred = model.predict(x_val)
    print("Précision sur le set d'évaluation :", accuracy_score(y_val, y_eval_pred))

    print("Arret de l'entrainement à l'itération :", model.n_iter_)

In [None]:
import time
from sklearn.linear_model import LogisticRegression
from PIL import Image
import numpy as np
import os
from sklearn.metrics import accuracy_score

model = LogisticRegression(
    penalty='l2',
    C=0.1,
    solver='saga',
    max_iter=50,
    class_weight='balanced',
    tol=1e-4,
    warm_start=True,
    verbose=1,
    fit_intercept=True,
    random_state=42,
    n_jobs=-1
)

def initData(datatype):

    x = []  # Images
    y = []  # Labels

    normalisation = 255.0
    resize_param = 22

    for label, category in enumerate(["NORMAL", "PNEUMONIA"]):
        folder = f"chest_Xray/{datatype}/{category}"
        for filename in os.listdir(folder):
            img_path = os.path.join(folder, filename)
            img = Image.open(img_path).convert("L")
            img = img.resize((resize_param, resize_param))
            img_array = np.array(img).flatten()
            x.append(img_array)
            y.append(label)

    return np.array(x) / normalisation, np.array(y)

##### MAIN #####

if __name__ == "__main__":

    #### Traing data initialization ####

    print("Initializing training data")
    start_trainingData = time.time()
    x_train, y_train = initData("train")
    end_trainingData = time.time() - start_trainingData
    print("Training data initialized in", end_trainingData, "seconds")


    #### Test data initialization ####

    print("Initializing test data")
    start_testData = time.time()
    x_test, y_test = initData("test")
    end_testData = time.time() - start_testData
    print("Test data initialized in", end_testData, "seconds")


    #### Validation data initialization ####

    print("Initializing validation data")
    start_valData = time.time()
    x_val, y_val = initData("val")
    end_valData = time.time() - start_valData
    print("Validation data initialized in", end_valData, "seconds")


    #### Model training ####

    start_trainingTime = time.time()
    model.fit(x_train, y_train)
    end_trainingTime = time.time()


    #### Prediction ####

    print(f"Temps d'entraînement : {end_trainingTime - start_trainingTime:.2f} secondes")

    y_pred = model.predict(x_test)
    print("Précision sur le set de test :", accuracy_score(y_test, y_pred))

    y_eval_pred = model.predict(x_val)
    print("Précision sur le set d'évaluation :", accuracy_score(y_val, y_eval_pred))

    print("Arret de l'entrainement à l'itération :", model.n_iter_)

In [None]:
import time
from sklearn.linear_model import LogisticRegression
from PIL import Image
import numpy as np
import os
from sklearn.metrics import accuracy_score

model = LogisticRegression(
    penalty='l2',
    C=0.1,
    solver='saga',
    max_iter=50,
    class_weight='balanced',
    tol=1e-4,
    warm_start=True,
    verbose=1,
    fit_intercept=True,
    random_state=42,
    n_jobs=-1
)

def initData(datatype):

    x = []  # Images
    y = []  # Labels

    normalisation = 255.0
    resize_param = 23

    for label, category in enumerate(["NORMAL", "PNEUMONIA"]):
        folder = f"chest_Xray/{datatype}/{category}"
        for filename in os.listdir(folder):
            img_path = os.path.join(folder, filename)
            img = Image.open(img_path).convert("L")
            img = img.resize((resize_param, resize_param))
            img_array = np.array(img).flatten()
            x.append(img_array)
            y.append(label)

    return np.array(x) / normalisation, np.array(y)

##### MAIN #####

if __name__ == "__main__":

    #### Traing data initialization ####

    print("Initializing training data")
    start_trainingData = time.time()
    x_train, y_train = initData("train")
    end_trainingData = time.time() - start_trainingData
    print("Training data initialized in", end_trainingData, "seconds")


    #### Test data initialization ####

    print("Initializing test data")
    start_testData = time.time()
    x_test, y_test = initData("test")
    end_testData = time.time() - start_testData
    print("Test data initialized in", end_testData, "seconds")


    #### Validation data initialization ####

    print("Initializing validation data")
    start_valData = time.time()
    x_val, y_val = initData("val")
    end_valData = time.time() - start_valData
    print("Validation data initialized in", end_valData, "seconds")


    #### Model training ####

    start_trainingTime = time.time()
    model.fit(x_train, y_train)
    end_trainingTime = time.time()


    #### Prediction ####

    print(f"Temps d'entraînement : {end_trainingTime - start_trainingTime:.2f} secondes")

    y_pred = model.predict(x_test)
    print("Précision sur le set de test :", accuracy_score(y_test, y_pred))

    y_eval_pred = model.predict(x_val)
    print("Précision sur le set d'évaluation :", accuracy_score(y_val, y_eval_pred))

    print("Arret de l'entrainement à l'itération :", model.n_iter_)

In [None]:
import time
from sklearn.linear_model import LogisticRegression
from PIL import Image
import numpy as np
import os
from sklearn.metrics import accuracy_score

model = LogisticRegression(
    penalty='l2',
    C=0.1,
    solver='saga',
    max_iter=50,
    class_weight='balanced',
    tol=1e-4,
    warm_start=True,
    verbose=1,
    fit_intercept=True,
    random_state=42,
    n_jobs=-1
)

def initData(datatype):

    x = []  # Images
    y = []  # Labels

    normalisation = 255.0
    resize_param = 24

    for label, category in enumerate(["NORMAL", "PNEUMONIA"]):
        folder = f"chest_Xray/{datatype}/{category}"
        for filename in os.listdir(folder):
            img_path = os.path.join(folder, filename)
            img = Image.open(img_path).convert("L")
            img = img.resize((resize_param, resize_param))
            img_array = np.array(img).flatten()
            x.append(img_array)
            y.append(label)

    return np.array(x) / normalisation, np.array(y)

##### MAIN #####

if __name__ == "__main__":

    #### Traing data initialization ####

    print("Initializing training data")
    start_trainingData = time.time()
    x_train, y_train = initData("train")
    end_trainingData = time.time() - start_trainingData
    print("Training data initialized in", end_trainingData, "seconds")


    #### Test data initialization ####

    print("Initializing test data")
    start_testData = time.time()
    x_test, y_test = initData("test")
    end_testData = time.time() - start_testData
    print("Test data initialized in", end_testData, "seconds")


    #### Validation data initialization ####

    print("Initializing validation data")
    start_valData = time.time()
    x_val, y_val = initData("val")
    end_valData = time.time() - start_valData
    print("Validation data initialized in", end_valData, "seconds")


    #### Model training ####

    start_trainingTime = time.time()
    model.fit(x_train, y_train)
    end_trainingTime = time.time()


    #### Prediction ####

    print(f"Temps d'entraînement : {end_trainingTime - start_trainingTime:.2f} secondes")

    y_pred = model.predict(x_test)
    print("Précision sur le set de test :", accuracy_score(y_test, y_pred))

    y_eval_pred = model.predict(x_val)
    print("Précision sur le set d'évaluation :", accuracy_score(y_val, y_eval_pred))

    print("Arret de l'entrainement à l'itération :", model.n_iter_)

In [None]:
import time
from sklearn.linear_model import LogisticRegression
from PIL import Image
import numpy as np
import os
from sklearn.metrics import accuracy_score

model = LogisticRegression(
    penalty='l2',
    C=0.1,
    solver='saga',
    max_iter=50,
    class_weight='balanced',
    tol=1e-4,
    warm_start=True,
    verbose=1,
    fit_intercept=True,
    random_state=42,
    n_jobs=-1
)

def initData(datatype):

    x = []  # Images
    y = []  # Labels

    normalisation = 255.0
    resize_param = 21

    for label, category in enumerate(["NORMAL", "PNEUMONIA"]):
        folder = f"chest_Xray/{datatype}/{category}"
        for filename in os.listdir(folder):
            img_path = os.path.join(folder, filename)
            img = Image.open(img_path).convert("L")
            img = img.resize((resize_param, resize_param))
            img_array = np.array(img).flatten()
            x.append(img_array)
            y.append(label)

    return np.array(x) / normalisation, np.array(y)

##### MAIN #####

if __name__ == "__main__":

    #### Traing data initialization ####

    print("Initializing training data")
    start_trainingData = time.time()
    x_train, y_train = initData("train")
    end_trainingData = time.time() - start_trainingData
    print("Training data initialized in", end_trainingData, "seconds")


    #### Test data initialization ####

    print("Initializing test data")
    start_testData = time.time()
    x_test, y_test = initData("test")
    end_testData = time.time() - start_testData
    print("Test data initialized in", end_testData, "seconds")


    #### Validation data initialization ####

    print("Initializing validation data")
    start_valData = time.time()
    x_val, y_val = initData("val")
    end_valData = time.time() - start_valData
    print("Validation data initialized in", end_valData, "seconds")


    #### Model training ####

    start_trainingTime = time.time()
    model.fit(x_train, y_train)
    end_trainingTime = time.time()


    #### Prediction ####

    print(f"Temps d'entraînement : {end_trainingTime - start_trainingTime:.2f} secondes")

    y_pred = model.predict(x_test)
    print("Précision sur le set de test :", accuracy_score(y_test, y_pred))

    y_eval_pred = model.predict(x_val)
    print("Précision sur le set d'évaluation :", accuracy_score(y_val, y_eval_pred))

    print("Arret de l'entrainement à l'itération :", model.n_iter_)

In [None]:
import time
from sklearn.linear_model import LogisticRegression
from PIL import Image
import numpy as np
import os
from sklearn.metrics import accuracy_score

model = LogisticRegression(
    penalty='l2',
    C=0.1,
    solver='saga',
    max_iter=50,
    class_weight='balanced',
    tol=1e-4,
    warm_start=True,
    verbose=1,
    fit_intercept=True,
    random_state=42,
    n_jobs=-1
)

def initData(datatype):

    x = []  # Images
    y = []  # Labels

    normalisation = 255.0
    resize_param = 224  # Taille uniforme pour toutes les images

    for label, category in enumerate(["NORMAL", "PNEUMONIA"]):
        folder = f"chest_Xray/{datatype}/{category}"
        for filename in os.listdir(folder):
            img_path = os.path.join(folder, filename)
            img = Image.open(img_path).convert("L")
            img = img.resize((resize_param, resize_param))
            img_array = np.array(img).flatten()
            x.append(img_array)
            y.append(label)

    return np.array(x) / normalisation, np.array(y)

##### MAIN #####

if __name__ == "__main__":

    #### Traing data initialization ####

    print("Initializing training data")
    start_trainingData = time.time()
    x_train, y_train = initData("train")
    end_trainingData = time.time() - start_trainingData
    print("Training data initialized in", end_trainingData, "seconds")


    #### Test data initialization ####

    print("Initializing test data")
    start_testData = time.time()
    x_test, y_test = initData("test")
    end_testData = time.time() - start_testData
    print("Test data initialized in", end_testData, "seconds")


    #### Validation data initialization ####

    print("Initializing validation data")
    start_valData = time.time()
    x_val, y_val = initData("val")
    end_valData = time.time() - start_valData
    print("Validation data initialized in", end_valData, "seconds")


    #### Model training ####

    start_trainingTime = time.time()
    model.fit(x_train, y_train)
    end_trainingTime = time.time()


    #### Prediction ####

    print(f"Temps d'entraînement : {end_trainingTime - start_trainingTime:.2f} secondes")

    y_pred = model.predict(x_test)
    print("Précision sur le set de test :", accuracy_score(y_test, y_pred))

    y_eval_pred = model.predict(x_val)
    print("Précision sur le set d'évaluation :", accuracy_score(y_val, y_eval_pred))

    print("Arret de l'entrainement à l'itération :", model.n_iter_)

In [None]:
import time
from sklearn.linear_model import LogisticRegression
from PIL import Image
import numpy as np
import os
from sklearn.metrics import accuracy_score

model = LogisticRegression(
    penalty='l2',
    C=0.1,
    solver='saga',
    max_iter=50,
    class_weight='balanced',
    tol=1e-4,
    warm_start=True,
    verbose=1,
    fit_intercept=True,
    random_state=42,
    n_jobs=-1
)

def initData(datatype):

    x = []  # Images
    y = []  # Labels

    normalisation = 255.0
    resize_param = 1

    for label, category in enumerate(["NORMAL", "PNEUMONIA"]):
        folder = f"chest_Xray/{datatype}/{category}"
        for filename in os.listdir(folder):
            img_path = os.path.join(folder, filename)
            img = Image.open(img_path).convert("L")
            img = img.resize((resize_param, resize_param))
            img_array = np.array(img).flatten()
            x.append(img_array)
            y.append(label)

    return np.array(x) / normalisation, np.array(y)

##### MAIN #####

if __name__ == "__main__":

    #### Traing data initialization ####

    print("Initializing training data")
    start_trainingData = time.time()
    x_train, y_train = initData("train")
    end_trainingData = time.time() - start_trainingData
    print("Training data initialized in", end_trainingData, "seconds")


    #### Test data initialization ####

    print("Initializing test data")
    start_testData = time.time()
    x_test, y_test = initData("test")
    end_testData = time.time() - start_testData
    print("Test data initialized in", end_testData, "seconds")


    #### Validation data initialization ####

    print("Initializing validation data")
    start_valData = time.time()
    x_val, y_val = initData("val")
    end_valData = time.time() - start_valData
    print("Validation data initialized in", end_valData, "seconds")


    #### Model training ####

    start_trainingTime = time.time()
    model.fit(x_train, y_train)
    end_trainingTime = time.time()


    #### Prediction ####

    print(f"Temps d'entraînement : {end_trainingTime - start_trainingTime:.2f} secondes")

    y_pred = model.predict(x_test)
    print("Précision sur le set de test :", accuracy_score(y_test, y_pred))

    y_eval_pred = model.predict(x_val)
    print("Précision sur le set d'évaluation :", accuracy_score(y_val, y_eval_pred))

    print("Arret de l'entrainement à l'itération :", model.n_iter_)

On note un pic à resize = 22/23 de la précision sur le set de test