In [38]:
import requests
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
import urllib.request
import cv2
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score,confusion_matrix
import pickle

In [2]:
lst = ['cat', 'dog', 'elephant', 'parrot', 'cow', 'lion', 'giraffe', 'panda', 'penguin', 'cheetah']

n = 0
arr = []
label = []
img_dict = {} 

for animal in lst:
    
    url = f"https://free-images-api.p.rapidapi.com/images/{animal}"

    
    headers = {
        "X-RapidAPI-Key": "210c3f68bcmsh385021a1051c143p1bf373jsn1c3eb5d97ea3",
        "X-RapidAPI-Host": "free-images-api.p.rapidapi.com"
    }

    
    response = requests.get(url, headers=headers)
    
    
    df = pd.json_normalize(response.json(), record_path=['results'])
    
    
    folder_path = f'C:\\Users\\CHARISHMA\\Pictures\\ML Images\\animals\\{animal}'
    
    os.makedirs(folder_path, exist_ok=True)
    
    os.chdir(folder_path)
    
    for j in df['image']:
        image_path = f'{animal}{n}.jpg'
        urllib.request.urlretrieve(j, image_path)
        n += 1
        
        image = cv2.imread(image_path)
        
        gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        
        resized_image = cv2.resize(gray_image, (5, 4))
        
        arr.append(resized_image.flatten())
        
        # Add the image name and grayscale numpy array to the dictionary
        img_dict[image_path] = resized_image
        
        # Add the current animal label to the label list
        label.append(animal)

# Create a DataFrame from the flattened image data
data = pd.DataFrame(arr)

# Add the animal labels as a new column in the DataFrame
data['label'] = label

# At this point, data contains the flattened grayscale image arrays and labels.


In [3]:
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,91,92,93,94,95,96,97,98,99,label
0,119,145,149,141,147,80,153,120,133,121,...,59,92,47,57,38,78,52,49,19,cat
1,111,120,125,128,132,130,128,127,122,117,...,104,67,106,105,109,110,110,100,106,cat
2,136,141,138,143,142,145,140,131,115,103,...,23,21,26,25,17,10,14,32,13,cat
3,239,241,238,239,239,239,238,239,237,234,...,222,225,226,226,229,229,229,229,225,cat
4,66,31,36,134,106,139,114,49,58,61,...,57,58,107,88,93,123,155,142,40,cat
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,128,131,124,58,11,106,14,115,2,4,...,0,18,26,80,132,106,57,91,52,cheetah
196,156,164,156,147,154,149,149,153,152,140,...,97,143,69,179,63,83,112,132,92,cheetah
197,94,117,136,137,209,229,219,202,187,210,...,161,172,156,179,169,187,191,165,165,cheetah
198,194,183,195,195,177,179,174,201,180,172,...,103,103,88,68,87,116,98,94,115,cheetah


In [4]:
data.to_csv('C:\\Users\\CHARISHMA\\Pictures\\ML Images\\animals\\animals.csv',index=False)

In [2]:
df = pd.read_csv(r"C:\Users\CHARISHMA\Pictures\ML Images\animals\animals.csv")

## Spitting the Data into train and test

In [32]:
X = df.drop('label',axis=1)
y = df['label']

In [33]:
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [44]:
encoded_classes = label_encoder.classes_

for index, label in enumerate(encoded_classes):
    print(f"{label} is encoded as {index}")

cat is encoded as 0
cheetah is encoded as 1
cow is encoded as 2
dog is encoded as 3
elephant is encoded as 4
giraffe is encoded as 5
lion is encoded as 6
panda is encoded as 7
parrot is encoded as 8
penguin is encoded as 9


In [34]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=23)

In [45]:
# Pipelines
pipelines = {
    'random_forest': Pipeline([
        ('classifier', RandomForestClassifier())
    ]),
    'svm': Pipeline([
        ('classifier', SVC())
    ]),
    'xgboost': Pipeline([
        ('classifier', XGBClassifier())
    ])
}

# Define parameter grid for each algorithm
param_grids = {
    'random_forest': {
        'classifier__n_estimators': [50, 100, 200],
        'classifier__max_depth': [None, 5, 10, 20]
    },
    'svm': {
        'classifier__C': [0.1, 1, 10, 100],
        'classifier__gamma': [0.001, 0.01, 0.1, 1],
        'classifier__kernel': ['rbf', 'linear']
    },
    'xgboost': {
        'classifier__learning_rate': [0.01, 0.1, 0.3],
        'classifier__n_estimators': [100, 200, 300],
        'classifier__max_depth': [3, 5, 7]
    }
}

# Perform GridSearchCV for each algorithm
best_models = {}

for algo in pipelines.keys():
    print("*"*10, algo, "*"*10)
    grid_search = GridSearchCV(estimator=pipelines[algo], 
                               param_grid=param_grids[algo], 
                               cv=5, 
                               scoring='accuracy', 
                               return_train_score=True,
                               verbose=1
                              )
    
    %time grid_search.fit(X_train, y_train)
    
    best_models[algo] = grid_search.best_estimator_
    
    # Save best model using pickle
    with open(algo + '_best_model.pkl', 'wb') as model_file:
        pickle.dump(best_models[algo], model_file)
    
    print('Score on Test Data: ', grid_search.score(X_test, y_test))

********** random_forest **********
Fitting 5 folds for each of 12 candidates, totalling 60 fits
CPU times: total: 17 s
Wall time: 19.7 s
Score on Test Data:  0.175
********** svm **********
Fitting 5 folds for each of 32 candidates, totalling 160 fits
CPU times: total: 1.8 s
Wall time: 1.83 s
Score on Test Data:  0.175
********** xgboost **********
Fitting 5 folds for each of 27 candidates, totalling 135 fits
CPU times: total: 19min 15s
Wall time: 3min 16s
Score on Test Data:  0.1


In [46]:
import pickle

with open('random_forest_best_model.pkl', 'rb') as model_file:
    random_forest_model = pickle.load(model_file)

with open('svm_best_model.pkl', 'rb') as model_file:
    svm_model = pickle.load(model_file)

with open('xgboost_best_model.pkl', 'rb') as model_file:
    xgboost_model = pickle.load(model_file)

