<a href="https://colab.research.google.com/github/HimanshuPathak2725/googleColab/blob/main/FishClip.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# from datasets import load_dataset
# df = load_dataset("Otolith/FishCLIP")

In [None]:
# df

In [None]:
# # datasets in colom using pandas
# df = df["train"].to_pandas()
# df.head()

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import pandas as pd

In [None]:
df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/downloaded_fish_images/fishclip.csv")
df.head()

In [None]:
df.isna().sum()

In [None]:
import requests

# Task
Download images from the URLs in the "photo_url" column of the dataframe and save them locally. Add a new column to the dataframe with the local file paths of the downloaded images.

## Create a directory

### Subtask:
Create a local directory to store the downloaded images.


**Reasoning**:
Create a directory to store the downloaded images using the `os` module.



In [None]:
import os

image_dir = "/content/drive/MyDrive/Colab Notebooks/downloaded_fish_images"
os.makedirs(image_dir, exist_ok=True)

## Define a function to download images

### Subtask:
Create a function that takes a URL and a file path as input, downloads the image from the URL, and saves it to the specified file path.


**Reasoning**:
Define a function to download an image from a URL and save it to a specified file path, including error handling.



In [None]:
def download_image(url, file_path):
    """
    Downloads an image from a URL and saves it to a file path.

    Args:
        url: The URL of the image.
        file_path: The path to save the image.

    Returns:
        The file_path if successful, None otherwise.
    """
    try:
        response = requests.get(url, stream=True)
        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
        with open(file_path, 'wb') as out_file:
            for chunk in response.iter_content(chunk_size=8192):
                out_file.write(chunk)
        return file_path
    except requests.exceptions.RequestException as e:
        print(f"Failed to download image from {url}: {e}")
        return None

## Apply the function to the dataframe

### Subtask:
Apply the image downloading function to each row of the DataFrame to download the images and store the local file paths in a new column.


In [None]:
image_dir = "/content/drive/MyDrive/Colab Notebooks/downloaded_fish_images"

# Add a new column 'local_image_path' with initial None values
# df['local_image_path'] = None

In [None]:
df

In [None]:
# saving the datset in csv format
# df.to_csv("/content/drive/MyDrive/Colab Notebooks/downloaded_fish_images/fishclip.csv", index=False)

In [None]:
# Iterate through the DataFrame and download images if they don't exist locally
# for index, row in df.iterrows():
#     file_path = os.path.join(image_dir, f"{row['photo_id']}.jpg")

#     if not os.path.exists(file_path):
#         downloaded_path = download_image(row['photo_url'], file_path)

#         if downloaded_path:
#             df.at[index, 'local_image_path'] = downloaded_path
#     else:

#         df.at[index, 'local_image_path'] = file_path

In [None]:
# saving the datset in csv format
# df.to_csv("/content/drive/MyDrive/Colab Notebooks/downloaded_fish_images/fishclip.csv", index=False)

In [None]:
display(df.head())

In [None]:
#Finding the the number of images in "/content/drive/MyDrive/Colab Notebooks/downloaded_fish_images"
len(os.listdir("/content/drive/MyDrive/Colab Notebooks/downloaded_fish_images"))

In [None]:
df.isnull().sum()

In [None]:
df.info()

In [None]:
#To find each unique value name in each of the columns
for column in df.columns:
    unique_values = df[column].unique()
    print(f"Unique values in column '{column}':")
    print(unique_values)
    print()

In [None]:
!pip install tensorflow tensorflow-datasets

In [None]:
#Fine tuning a fathomnet model on the given data for image classification using tensorflow
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

## Prepare Data Generators

### Subtask:
Create data generators for training and validation using `ImageDataGenerator`.

In [None]:
#droppping the rows with null values
df.dropna(inplace=True)

In [None]:
df

In [None]:
# Define image dimensions and batch size
IMG_HEIGHT = 224
IMG_WIDTH = 224
BATCH_SIZE = 128

# Create ImageDataGenerators for training and validation
# Note: Since we're using a pre-trained model, we only need minimal augmentation
train_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)
valid_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)

# Create data generators from the DataFrame
# Ensure your DataFrame has 'local_image_path' and 'Species' columns
train_generator = train_datagen.flow_from_dataframe(
    dataframe=df,
    x_col='local_image_path',
    y_col='Species',
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='training'
)

validation_generator = valid_datagen.flow_from_dataframe(
    dataframe=df,
    x_col='local_image_path',
    y_col='Species',
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='validation'
)

# Get the number of classes
num_classes = len(train_generator.class_indices)
print(f"Number of classes: {num_classes}")

## Load and Modify the Pre-trained Model

### Subtask:
Load the pre-trained EfficientNetB0 model and add a new output layer for classification.

In [None]:
# Load the pre-trained EfficientNetB0 model
base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(IMG_HEIGHT, IMG_WIDTH, 3))

# Freeze the convolutional base
for layer in base_model.layers:
    layer.trainable = False

# Add new layers for classification
x = base_model.output
x = GlobalAveragePooling2D()(x)
predictions = Dense(num_classes, activation='softmax')(x)

# Create the new model
model = Model(inputs=base_model.input, outputs=predictions)

In [None]:
#compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Train the model
EPOCHS = 10 # You can adjust the number of epochs

history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // BATCH_SIZE,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // BATCH_SIZE,
    epochs=EPOCHS
)

## Evaluate the Model

### Subtask:
Evaluate the performance of the trained model on the validation set.

In [None]:
# Evaluate the model on the validation set
loss, accuracy = model.evaluate(validation_generator)

print(f"Validation Loss: {loss}")
print(f"Validation Accuracy: {accuracy}")