In [48]:
import os
import pandas as pd
import numpy as np
from skimage.io import imread
from skimage.transform import resize
import cv2 as cv
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split,GridSearchCV,cross_val_score
import warnings
import shutil
from tqdm import tqdm
warnings.filterwarnings('ignore')
%load_ext autotime

The autotime extension is already loaded. To reload it, use:
  %reload_ext autotime
time: 0 ns (started: 2023-03-04 15:31:56 +05:30)


# IMG  DIR

In [19]:
len(os.listdir('Img/'))

16425

time: 47 ms (started: 2023-03-04 15:12:47 +05:30)


In [20]:
files=os.listdir('raw_Img/')
files[:10]

['img001-001.png',
 'img001-002.png',
 'img001-003.png',
 'img001-004.png',
 'img001-005.png',
 'img001-006.png',
 'img001-007.png',
 'img001-008.png',
 'img001-009.png',
 'img001-010.png']

time: 94 ms (started: 2023-03-04 15:12:47 +05:30)


In [21]:
data_set=pd.read_csv('kannada.csv')
data_set.head()

Unnamed: 0,img,class
0,Img/img001-001.png,1
1,Img/img001-002.png,1
2,Img/img001-003.png,1
3,Img/img001-004.png,1
4,Img/img001-005.png,1


time: 31 ms (started: 2023-03-04 15:12:47 +05:30)


# Resize Test

In [22]:
img=cv.imread(f'raw_img/{files[4]}')
print(files[4])
cv.imshow('sample',img)
cv.waitKey(0)

img001-005.png


-1

time: 9.86 s (started: 2023-03-04 15:12:47 +05:30)


In [23]:
tranform_img=resize(img,(150,150,3))
cv.imshow('tranformed',tranform_img)
cv.waitKey(0)

-1

time: 1.36 s (started: 2023-03-04 15:12:57 +05:30)


# Image Tranformation

In [24]:
try:
    os.makedirs('img')
    for i in tqdm(range(len(files))):
        img=cv.imread(f'raw_img_test/{files[i]}')
        tranform_img=resize(img,(150,150,3))
        path=f'img/{files[i]}'
        cv.imwrite(path, tranform_img*255)
except:
    pass

time: 0 ns (started: 2023-03-04 15:12:59 +05:30)


# Feature Extraction

In [49]:
feature=[]
target=[]
for i in tqdm(range(len(os.listdir('Img/')[:400]))):
    img_path=(data_set['img'].iloc[i])

    img=imread(img_path) #tranform image
    flatten_img=img.flatten()

    feature.append(flatten_img)
    target.append(data_set['class'].iloc[i])


feature=np.array(feature)
target=np.array(target)

100%|███████████████████████████████████████████████████████████████████████████████| 400/400 [00:00<00:00, 487.27it/s]

time: 906 ms (started: 2023-03-04 15:32:02 +05:30)





# Splitting with 30% for testing

In [50]:
X_train,X_test,y_train,y_test=train_test_split(feature,target,test_size=.2)

time: 31 ms (started: 2023-03-04 15:32:05 +05:30)


In [51]:
X_train.shape,X_test.shape

((320, 67500), (80, 67500))

time: 0 ns (started: 2023-03-04 15:32:07 +05:30)


In [52]:
model1=RandomForestClassifier()
model1.fit(X_train,y_train)
model1.score(feature,target)*100,model1.score(X_test,y_test)*100

(87.0, 35.0)

time: 3.02 s (started: 2023-03-04 15:32:10 +05:30)


In [53]:
model2=LogisticRegression()
model2.fit(X_train,y_train)
model2.score(feature,target)*100,model2.score(X_test,y_test)*100

(87.0, 35.0)

time: 25.9 s (started: 2023-03-04 15:32:13 +05:30)


In [58]:
model3=SVC()
model3.fit(X_train,y_train)
model3.score(feature,target)*100,model3.score(X_test,y_test)*100

(85.5, 37.5)

time: 1min 6s (started: 2023-03-04 15:50:34 +05:30)


# HyperParameter Tuning

<!-- # # svc=SVC()

# # params={
# #     'C':[1,2,3,4,5],
# #     'kernel':['poly','rbf','sigmoid'],
# #     'gamma':['auto','scale']
# # }
# # GSsvm=GridSearchCV(svc,param_grid=params,n_jobs=1)
# # GSsvm.fit(X_train,y_train)
# # print(GSsvm.best_params_)
# # print(GSsvm.best_score_) -->

In [None]:
lr=LogisticRegression()

params={
    'C':[1,2,3,4,5],
    'penalty':['l1', 'l2', 'elasticnet', None],
    "solver":["lbfgs", "liblinear"],
    'max_iter':[1000]
}

GSlr=GridSearchCV(lr,param_grid=params)
GSlr.fit(X_train,y_train)
print(GSlr.best_params_)
print(GSlr.best_score_)

In [None]:
svc=SVC()

params={
    'C':[1,2,3,4,5],
    'kernel':['poly','rbf','sigmoid'],
    'gamma':['auto','scale']
}
GSsvm=GridSearchCV(svc,param_grid=params,n_jobs=1)
GSsvm.fit(X_train,y_train)
print(GSsvm.best_params_)
print(GSsvm.best_score_)

In [None]:
rf=RandomForestClassifier()

params={
    'min_samples_split':[2,4,6],
    "min_samples_leaf":[1,2,3,4],
    'max_features':['auto','log2',None]
}

GSrf=GridSearchCV(rf,param_grid=params)
GSrf.fit(X_train,y_train)
print(GSrf.best_params_)
print(GSrf.best_score_)

In [None]:
results={
    'Algorithm':['LogisticRegression','SVC','RandomForestClassifier'],
    'HyperParameters':[GSlr.best_params_,GSsvm.best_params_,GSrf.best_params_],
    'Best_Score':[GSlr.best_score_,GSsvm.best_score_,GSrf.best_score_],
    'Train_Accuracy':[GSlr.score(X_train,y_train),GSsvm.score(X_train,y_train),GSrf.score(X_train,y_train)],
    'Test_Accuracy':[GSlr.score(X_test,y_test),GSsvm.score(X_test,y_test),GSrf.score(X_test,y_test)]
    
}

resultsdf=pd.DataFrame(results)
resultsdf.sort_values(by=['Best_Score'],ascending=False,inplace=True)
resultsdf

# CrossValScore

In [32]:
np.average(cross_val_score(model1,X_train,y_train,cv=3)),np.average(cross_val_score(model2,X_train,y_train,cv=3))

(0.26523297491039427, 0.31541218637992835)

time: 1min 15s (started: 2023-03-04 15:19:20 +05:30)


# Predicting

In [31]:
def tranform_image(img_path):
    feature=[]
    img=imread(f'img/{img_path}.png')
    tranform_img=resize(img,(150,150,3))
    flatten_img=tranform_img.flatten()
    feature.append(flatten_img)
    return np.array(feature)

time: 0 ns (started: 2023-03-04 15:19:08 +05:30)


In [64]:
img_name=f'../test_img/{input()}'
print(model1.predict(tranform_image(img_name)))
print(model2.predict(tranform_image(img_name)))
print(model3.predict(tranform_image(img_name)))

img010-019
[8]
[10]
[8]
time: 2.25 s (started: 2023-03-04 15:55:05 +05:30)


# Dumping Model for future use

In [65]:
import pickle

pickle.dump(model2,open('KCR.pkl','wb'))

time: 15 ms (started: 2023-03-04 15:55:24 +05:30)
