In [None]:
# step1 label the file name with their labels in it
import os
import pandas as pd
data_dir = '/kaggle/input/weather-dataset/dataset'
paths = []
classLabels = []
# list the directories of folders within the dataset
folders = os.listdir(data_dir)
# iteratate through each of the directories classes
for folder in folders:
    # for each class iterate through each of the images in it
    foldpath = os.path.join(data_dir, folder)
    filelist = os.listdir(foldpath)
    for file in filelist:
        # for each image add it location to paths list
        fpath = os.path.join(foldpath, file)
        paths.append(fpath)
        # and  class label as the name of folder in classlabel list
        classLabels.append(folder)

# Concatenate data paths with labels into one dataframe
df = pd.DataFrame(list(zip(paths, classLabels)),columns =['filePaths', 'label'])

In [None]:
# few instances of dataset
df.head()

In [None]:
# number of images in each class
df['label'].value_counts()

In [None]:
# Extracting feature from images
import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import preprocess_input
import numpy as np
# Using the Tensorflow to extract freatue
# using the pretrained model of VGG16 which is an cnn for image dataset 
# extracting by removing the last layer of model

#step1.1 Loading the pre-trained VGG16 model
model = VGG16(weights='imagenet', include_top=False)

def extract_features(img_path):
    #  resizing all images in 244*244 size
    img = image.load_img(img_path, target_size=(224, 224))
    # convert the image to array
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    # preprocess the array as the input required for the vgg16 model
    # using the function preprocess_input
    img_array = preprocess_input(img_array)
    features = model.predict(img_array)
    # return the feature extracted
    return features.flatten()

# for each image find its feature extract from above funtion and store them as a list
img_features = list(map(extract_features,paths))
print(len(img_features))


In [None]:
from sklearn.model_selection import train_test_split
#step 2
# split the dataset in 80 for train 10 for test and 10 for validation
# split in 80 and 20
X_train, X_temp, y_train, y_temp = train_test_split(img_features, classLabels, test_size=0.2, random_state=42)
# from 20 again split 10 for validation and 10 for testing
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)


In [None]:
#step 3
# import the classifiers  GaussianNB , DecisionTreeClassifier , SVC from sklearn
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
# run and train gussion naive bayes
naive_Bayes = GaussianNB()
naive_Bayes.fit(X_train, y_train)
# predict using the trained naive_Bayes for X_test and X_val
nb_test = naive_Bayes.predict(X_test)
nb_val=naive_Bayes.predict(X_val)

# run and train Decision Tree
dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)
# predict using the trained Decision Tree for X_test and X_val
dt_test = dt.predict(X_test)
dt_val = dt.predict(X_val)

# run and train SVM
svm = SVC(probability=True)
svm.fit(X_train, y_train)
# predict using the trained SVM for X_test and X_val
svm_test = svm.predict(X_test)
svm_val = svm.predict(X_val)


In [None]:
# step 4
# import the different measures to evaluate the model
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
def printReport(y_true, y_pred, model_name):
    """
    This funtion will compute the different matrices for each of the model 
    and print the accuracy , preission , recall ,f1 score , auc roc curve, confussion matrix 
    """
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred,average="weighted")
    recall = recall_score(y_true, y_pred,average="weighted")
    f1 = f1_score(y_true, y_pred,average="weighted")
    confusion_mat = confusion_matrix(y_true, y_pred)

    print("Metrics for ",model_name,":\n")
    print("Accuracy:",accuracy)
    print("Precision:",precision)
    print("Recall:",recall)
    print("F1-Score:",f1)
    print("Confusion Matrix:")
    print(confusion_mat)

# Evaluate on validation dataset for each of model
printReport(y_val, nb_val, 'Gaaussian Naive Bayes on validation data set')
print("---------------------------------------------------------------------------\n")
printReport(y_val, dt_val, 'Decision Tree on validation data set')
print("---------------------------------------------------------------------------\n")
printReport(y_val, svm_val, 'SVM on validation data set')
# Evaluate on test dataset for each of model
print("---------------------------------------------------------------------------\n")
printReport(y_test, nb_test, 'Gaaussian Naive Bayes on test data set')
print("---------------------------------------------------------------------------\n")
printReport(y_test, dt_test, 'Decision Tree on test data set')
print("---------------------------------------------------------------------------\n")
printReport(y_test, svm_test, 'SVM on test data set')
