In [None]:
import zipfile
with zipfile.ZipFile("/content/mask_dataset_zip.zip", 'r') as zip_ref:
  zip_ref.extractall("")

In [None]:
import os
import cv2
import numpy as np
import pandas as pd


# def extract_features(img):
#     return  list(cv2.mean(im))[:3]

# def extract_features(img):
#     mean_rgb = list(cv2.mean(img))[:3]
#     std_rgb = list(np.std(img, axis=(0, 1)))[::-1]
#     gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
#     mean_gray = np.mean(gray)
#     std_gray = np.std(gray)
#     return mean_rgb + std_rgb + [mean_gray, std_gray]


def extract_features(img, bins=8):
    # Calculate color histograms
    hist_r = cv2.calcHist([img], [0], None, [bins], [0, 256]).flatten()
    hist_g = cv2.calcHist([img], [1], None, [bins], [0, 256]).flatten()
    hist_b = cv2.calcHist([img], [2], None, [bins], [0, 256]).flatten()

    # Concatenate and normalize
    hist = np.concatenate([hist_r, hist_g, hist_b])
    hist = hist / np.sum(hist)  # Normalize to sum = 1
    return hist.tolist()



data=[]
basepath='./mask_dataset/'
for folder in os.scandir(basepath):
    if folder.is_dir():
        #print(folder.name)
        for file in os.scandir(folder.path):
            im=cv2.imread(file.path)
            im=cv2.cvtColor(im,cv2.COLOR_BGR2RGB)
            features=extract_features(im)
            features.append(folder.name)
            data.append(features)
            #print(features)

In [None]:
print(data)

[[0.028209133073687553, 0.014814814552664757, 0.019083742052316666, 0.025955675169825554, 0.055049829185009, 0.07012494653463364, 0.10596460103988647, 0.014130596071481705, 0.0010188904125243425, 0.00396400410681963, 0.010850810445845127, 0.041313402354717255, 0.04128365218639374, 0.09878773987293243, 0.11115573346614838, 0.02495909482240677, 0.005562992766499519, 0.01794585771858692, 0.029332144185900688, 0.029339579865336418, 0.038814518600702286, 0.06340175867080688, 0.07688532024621964, 0.07205116748809814, 'Improper'], [0.0, 1.9391118257772177e-05, 0.0004459957417566329, 0.005526469089090824, 0.025547798722982407, 0.10015027970075607, 0.2011149823665619, 0.0005284079816192389, 0.0, 1.4543339602823835e-05, 0.0009307737345807254, 0.004842931870371103, 0.010602094233036041, 0.04077952355146408, 0.23905371129512787, 0.03710975497961044, 0.0, 4.36300178989768e-05, 0.0015367461601272225, 0.00524044968187809, 0.009322280064225197, 0.028650378808379173, 0.12315784394741058, 0.165381997823

In [None]:
# df = pd.DataFrame(data, columns=['R','G','B','class'])
# df.to_csv('curdata.csv',index=False)

# df = pd.DataFrame(data, columns=[
#     'mean_R', 'mean_G', 'mean_B',
#     'std_R', 'std_G', 'std_B',
#     'mean_gray', 'std_gray',
#     'class'
# ])
# df.to_csv('curdata.csv', index=False)

bins = 8
columns = [f'R_bin{i}' for i in range(bins)] + \
          [f'G_bin{i}' for i in range(bins)] + \
          [f'B_bin{i}' for i in range(bins)] + ['class']

df = pd.DataFrame(data, columns=columns)
df.to_csv('curdata.csv', index=False)


In [None]:
df = pd.read_csv('curdata.csv')
print(df)

      R_bin0    R_bin1    R_bin2    R_bin3    R_bin4    R_bin5    R_bin6  \
0   0.028209  0.014815  0.019084  0.025956  0.055050  0.070125  0.105965   
1   0.000000  0.000019  0.000446  0.005526  0.025548  0.100150  0.201115   
2   0.005041  0.014705  0.029328  0.052908  0.040144  0.075720  0.083093   
3   0.003748  0.007175  0.012191  0.020763  0.039484  0.073688  0.137269   
4   0.002497  0.010007  0.018429  0.026399  0.044911  0.096927  0.117112   
..       ...       ...       ...       ...       ...       ...       ...   
61  0.041207  0.011948  0.011749  0.017243  0.032888  0.069492  0.128388   
62  0.039321  0.025816  0.041468  0.043038  0.053038  0.068519  0.041667   
63  0.024317  0.022506  0.030880  0.030885  0.035625  0.047593  0.050961   
64  0.018704  0.023930  0.055581  0.075746  0.062788  0.058107  0.023215   
65  0.004282  0.010725  0.010733  0.009020  0.011304  0.013827  0.026435   

      R_bin7    G_bin0    G_bin1  ...    G_bin7    B_bin0    B_bin1    B_bin2  \
0   0.

In [None]:
from sklearn.model_selection import train_test_split
from sklearn import preprocessing

X=df.iloc[:,:-1].values
y=df.iloc[:,-1].values
print(X)
print(y)

le = preprocessing.LabelEncoder()
y=le.fit(y).transform(y)

print(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)


[[2.82091331e-02 1.48148146e-02 1.90837421e-02 ... 6.34017587e-02
  7.68853202e-02 7.20511675e-02]
 [0.00000000e+00 1.93911183e-05 4.45995742e-04 ... 2.86503788e-02
  1.23157844e-01 1.65381998e-01]
 [5.04115224e-03 1.47050759e-02 2.93278471e-02 ... 4.12551425e-02
  6.52743503e-02 4.77914959e-02]
 ...
 [2.43171304e-02 2.25057863e-02 3.08796298e-02 ... 3.04456018e-02
  1.14236111e-02 1.00937501e-01]
 [1.87037028e-02 2.39300411e-02 5.55812754e-02 ... 2.24022642e-02
  1.65329222e-02 4.98971203e-03]
 [4.28240746e-03 1.07253082e-02 1.07330251e-02 ... 2.16049375e-03
  2.68518529e-03 1.93773150e-01]]
['Improper' 'Improper' 'Improper' 'Improper' 'Improper' 'Improper'
 'Improper' 'Improper' 'Improper' 'Improper' 'Improper' 'Improper'
 'Improper' 'Improper' 'Improper' 'Improper' 'Improper' 'Improper'
 'Improper' 'Improper' 'Improper' 'Improper' 'Improper' 'Improper'
 'Improper' 'Improper' 'Yes' 'Yes' 'Yes' 'Yes' 'Yes' 'Yes' 'Yes' 'Yes'
 'Yes' 'Yes' 'Yes' 'Yes' 'Yes' 'Yes' 'Yes' 'NO' 'NO' 'NO' 'NO

In [None]:
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
X_train=sc.fit_transform(X_train)
X_test=sc.transform(X_test)
print(X_train)
print(X_test)

[[-0.70568673 -0.5576033  -0.49018091 ...  0.15432064  1.41929131
   0.40864386]
 [-0.43554723  0.01848238 -0.54971788 ... -0.60686402 -0.23755931
   1.51279921]
 [ 2.07214786  1.54492836  0.24629528 ... -0.99600038 -0.80488665
  -0.65160062]
 ...
 [ 0.66943014  0.03423202 -0.90164523 ... -0.73460465 -0.56673297
   1.67594569]
 [-0.61514826 -0.04012207 -0.0946024  ...  0.0156777  -0.17334298
  -0.63160768]
 [ 0.25969592 -0.6995817  -1.02839314 ... -1.17618556 -1.06565517
  -0.83041957]]
[[ 1.48182995e+00 -3.61527621e-01 -4.43391783e-01 -1.36409678e-01
   7.38333817e-01  3.69656246e-01 -4.69286482e-01 -5.09917806e-01
   1.78303131e+00 -2.84479830e-01  2.72890329e-01  9.74281915e-01
   1.83148751e-01 -4.07526946e-01 -6.97234589e-01 -5.62025182e-01
   7.38637370e-01  5.96713420e-01  3.82046589e-01  3.56247472e-01
  -2.58711638e-01 -2.96771104e-01 -4.64194604e-01 -5.40039334e-01]
 [ 3.01756801e-01 -1.02462861e-01  3.78132058e-01  3.68694024e-01
   6.33420553e-01  7.22971283e-01 -5.92774704

In [None]:
from sklearn.svm import SVC
classifier = model = SVC(kernel='linear')
classifier.fit(X_train, y_train)
svmy_pred = classifier.predict(X_test)


from sklearn.metrics import classification_report, confusion_matrix,accuracy_score
print(accuracy_score(y_test, svmy_pred))


0.7272727272727273


In [None]:
from sklearn.neural_network import MLPClassifier
classifier = MLPClassifier(random_state=42, hidden_layer_sizes=(100,50),max_iter=1000, solver='adam')
classifier.fit(X_train, y_train)
mlpy_pred = classifier.predict(X_test)


from sklearn.metrics import classification_report, confusion_matrix,accuracy_score
print(accuracy_score(y_test, mlpy_pred))


0.7272727272727273


In [None]:
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors=3)
classifier.fit(X_train, y_train)
knny_pred = classifier.predict(X_test)


from sklearn.metrics import classification_report, confusion_matrix,accuracy_score
print(accuracy_score(y_test, knny_pred))


0.6818181818181818


In [None]:
from sklearn.linear_model import LogisticRegression
classifier =LogisticRegression(max_iter=1000)
classifier.fit(X_train, y_train)
ly_pred = classifier.predict(X_test)


from sklearn.metrics import classification_report, confusion_matrix,accuracy_score
print(accuracy_score(y_test, ly_pred))


0.8181818181818182


In [None]:
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
rfy_pred = classifier.predict(X_test)


from sklearn.metrics import classification_report, confusion_matrix,accuracy_score
print(accuracy_score(y_test, rfy_pred))

0.8181818181818182
