<a href="https://colab.research.google.com/github/Sabrina-Hendricks/DS4002-Group13-Project3/blob/main/Scripts/Project3MI3Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Get photos from Github

In [33]:
import pandas as pd

# Define the base URL for your GitHub repository
base_url = 'https://raw.githubusercontent.com/Sabrina-Hendricks/DS4002-Group13-Project3/main/Data'

# List of breeds (directories)
breeds = ['Beagle', 'Boxer', 'Bulldog', 'Chihuahua', 'Chow', 'CockerSpaniel',
          'Doberman', 'GermanShepherd', 'Golden', 'GreatDane', 'Husky', 'Lab',
          'Pomeranian', 'Pug', 'Rottweiler', 'SaintBernard', 'Shih-tzu', 'StandardPoodle', 'StandardSchnauzer', 'Whippet']  # Add other breeds as needed

# Initialize lists to store the image URLs and labels
file_urls = []
labels = []

# Construct URLs for each breed
for breed in breeds:
    for i in range(1, 100):
        file_name = f"{breed}_{i}.jpg"  # Assumes file names follow this convention, e.g., 'Beagle_1.jpg'
        file_url = f"{base_url}/{breed}/{file_name}"

        file_urls.append(file_url)
        labels.append(breed)

# Create a DataFrame with the URLs and labels
df = pd.DataFrame({
    'file_url': file_urls,
    'label': labels
})

# Display the DataFrame
df.head()

Unnamed: 0,file_url,label
0,https://raw.githubusercontent.com/Sabrina-Hend...,Beagle
1,https://raw.githubusercontent.com/Sabrina-Hend...,Beagle
2,https://raw.githubusercontent.com/Sabrina-Hend...,Beagle
3,https://raw.githubusercontent.com/Sabrina-Hend...,Beagle
4,https://raw.githubusercontent.com/Sabrina-Hend...,Beagle


In [34]:
import numpy as np
import requests
from PIL import Image
from io import BytesIO

def load_image_from_url(url, target_size=(128, 128)):
    try:
        response = requests.get(url, timeout=5)
        response.raise_for_status()
        img = Image.open(BytesIO(response.content)).convert('RGB')
        img = img.resize(target_size)
        img_array = np.array(img) / 255.0  # Normalize to [0, 1]
        return img_array
    except requests.exceptions.RequestException as e:
        print(f"Request failed for URL {url}: {e}")
        return None
    except (Image.UnidentifiedImageError, IOError) as e:
        print(f"Could not open image from URL {url}: {e}")
        return None


Prep data generator to get images from urls

In [35]:
!pip install -U tensorflow



In [36]:
import numpy as np
from tensorflow.keras.utils import Sequence
from tensorflow.keras.utils import to_categorical


class URLImageDataGenerator(Sequence):
    def __init__(self, dataframe, batch_size=32, target_size=(128, 128), shuffle=True):
        self.dataframe = dataframe
        self.batch_size = batch_size
        self.target_size = target_size
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        return int(np.floor(len(self.dataframe) / self.batch_size))

    def __getitem__(self, index):
        batch_data = self.dataframe.iloc[index * self.batch_size:(index + 1) * self.batch_size]

        X, y = [], []
        for url, label_idx in zip(batch_data['file_url'], batch_data['label_idx']):
            img = load_image_from_url(url, self.target_size)
            if img is not None:
                X.append(img)
                y.append(label_idx)

        X = np.array(X)
        y = to_categorical(y, num_classes=len(breed_to_index))

        return X, y

    def on_epoch_end(self):
        if self.shuffle:
            self.dataframe = self.dataframe.sample(frac=1).reset_index(drop=True)




Prep data

In [37]:
# Example DataFrame df containing 'file_url' and 'label' columns
breed_to_index = {breed: idx for idx, breed in enumerate(df['label'].unique())}
df['label_idx'] = df['label'].map(breed_to_index)

# Split the data into training and validation sets
train_df = df.sample(frac=0.8, random_state=42)
val_df = df.drop(train_df.index)


In [38]:
batch_size = 32
target_size = (128, 128)

# Initialize the training and validation generators
train_generator = URLImageDataGenerator(
    dataframe=train_df,
    batch_size=batch_size,
    target_size=target_size,
    shuffle=True
)

val_generator = URLImageDataGenerator(
    dataframe=val_df,
    batch_size=batch_size,
    target_size=target_size,
    shuffle=False
)


Define model

In [39]:
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

# Load MobileNetV2 without the top layer
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(128, 128, 3))

# Freeze the base model layers
for layer in base_model.layers:
    layer.trainable = False

# Add classification layers
x = base_model.output
x = GlobalAveragePooling2D()(x)
predictions = Dense(len(breed_to_index), activation='softmax')(x)

# Build and compile the model
model = Model(inputs=base_model.input, outputs=predictions)
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])


Train model

In [40]:
# Train the model
epochs = 10  # Adjust as needed

history = model.fit(
    train_generator,
    validation_data=val_generator,
    steps_per_epoch=len(train_generator),
    validation_steps=len(val_generator),
    epochs=epochs
)


  self._warn_if_super_not_called()


Epoch 1/10
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m218s[0m 4s/step - accuracy: 0.3216 - loss: 2.3902 - val_accuracy: 0.7266 - val_loss: 0.9273
Epoch 2/10
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 3/10


  self.gen.throw(typ, value, traceback)


[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m205s[0m 4s/step - accuracy: 0.8300 - loss: 0.6593 - val_accuracy: 0.7995 - val_loss: 0.7061
Epoch 4/10
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 5/10
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m200s[0m 4s/step - accuracy: 0.9016 - loss: 0.4014 - val_accuracy: 0.8151 - val_loss: 0.6250
Epoch 6/10
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 921us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 7/10
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m196s[0m 4s/step - accuracy: 0.9417 - loss: 0.2887 - val_accuracy: 0.8151 - val_loss: 0.5941
Epoch 8/10
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 888us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 9/10
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m198s[0m 4s/step - accuracy: 0.9687 - loss: 0.2126 - val_accuracy: 0.8073 - va

Make predictions

In [50]:
def predict_breed(url):
    img_array = np.expand_dims(load_image_from_url(url), axis=0)  # Add batch dimension
    prediction = model.predict(img_array)
    predicted_index = np.argmax(prediction)
    predicted_breed = [breed for breed, idx in breed_to_index.items() if idx == predicted_index][0]
    return predicted_breed

# Iterate over the validation set and predict breeds
val_df['predicted_breed'] = val_df['file_url'].apply(predict_breed)

# Display the predictions
print(val_df[['file_url', 'label', 'predicted_breed']])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39

Accuracy Percentage

In [1]:
# Calculate the accuracy
correct_predictions = (val_df['label'] == val_df['predicted_breed']).sum()  # Count correct predictions
total_predictions = len(val_df)  # Total number of predictions
accuracy = (correct_predictions / total_predictions) * 100  # Calculate accuracy percentage

# Print the accuracy percentage
print(f"Accuracy: {accuracy:.2f}%")

NameError: name 'val_df' is not defined