In [25]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'satellite-image-classification:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F1544742%2F2546969%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240715%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240715T093410Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D649595840249388602fb8d14d5afd5ab4de4657059e27d7823782ebdb63b2825511391125d940d19eb92c53d80fa3462cf45b2786cf4c6c6b386aec3428953d0dd47e50ea7d1009d4b2f6b77fb2ad67eb5a290b82bc231cbd4c257ce176a93b3bedc9bb91816c94a34caae464bb824b07a7f3a4af7fee8563a09c507c4e564129023927ad5d4587422d7ed78d564e8612dc01923b2c8fc965ad57a28107ae965783ca0210b1a5a79e1f71ebd94d85ebc41cb61d0974a68041131655f7466ee9e410e4f846b3bf25f6da8045370b8ca415cd8054628a607a16ecb3a4b9dff363742032da80eac1cd82f4a5bc855966eb1341d2411391635b9826060686be4fbf3'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


Downloading satellite-image-classification, 22836290 bytes compressed
Downloaded and uncompressed: satellite-image-classification
Data source import complete.


In [26]:
import pandas as pd
import numpy as np
import os


In [27]:
img_types= os.listdir('/kaggle/input/satellite-image-classification/data')
print(img_types)
print('images classes: ', len(img_types))

['water', 'cloudy', 'green_area', 'desert']
images classes:  4


In [28]:
path=('/kaggle/input/satellite-image-classification/data')

In [29]:
images=[]
for item in img_types:
    all_imgs= os.listdir('/kaggle/input/satellite-image-classification/data'+ '/'+ item)

    for img in all_imgs:
        images.append((item, str('/kaggle/input/satellite-image-classification/data' + '/' +item)+ '/' + img))

images[0]

('water',
 '/kaggle/input/satellite-image-classification/data/water/SeaLake_668.jpg')

In [30]:
df= pd.DataFrame(data= images, columns=['label', 'image_path'])
df.head()

Unnamed: 0,label,image_path
0,water,/kaggle/input/satellite-image-classification/d...
1,water,/kaggle/input/satellite-image-classification/d...
2,water,/kaggle/input/satellite-image-classification/d...
3,water,/kaggle/input/satellite-image-classification/d...
4,water,/kaggle/input/satellite-image-classification/d...


In [31]:
from sklearn.utils import shuffle
df = shuffle(df)
df

Unnamed: 0,label,image_path
1413,water,/kaggle/input/satellite-image-classification/d...
3520,green_area,/kaggle/input/satellite-image-classification/d...
4762,desert,/kaggle/input/satellite-image-classification/d...
836,water,/kaggle/input/satellite-image-classification/d...
1808,cloudy,/kaggle/input/satellite-image-classification/d...
...,...,...
3754,green_area,/kaggle/input/satellite-image-classification/d...
591,water,/kaggle/input/satellite-image-classification/d...
2696,cloudy,/kaggle/input/satellite-image-classification/d...
1212,water,/kaggle/input/satellite-image-classification/d...


In [32]:
from sklearn.preprocessing import LabelEncoder

#label_encoder = LabelEncoder()
#df['label']= label_encoder.fit_transform(df['label'])
#df['label'].unique()

In [33]:
df

Unnamed: 0,label,image_path
1413,water,/kaggle/input/satellite-image-classification/d...
3520,green_area,/kaggle/input/satellite-image-classification/d...
4762,desert,/kaggle/input/satellite-image-classification/d...
836,water,/kaggle/input/satellite-image-classification/d...
1808,cloudy,/kaggle/input/satellite-image-classification/d...
...,...,...
3754,green_area,/kaggle/input/satellite-image-classification/d...
591,water,/kaggle/input/satellite-image-classification/d...
2696,cloudy,/kaggle/input/satellite-image-classification/d...
1212,water,/kaggle/input/satellite-image-classification/d...


In [34]:
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torchvision.transforms as transforms

In [35]:
from sklearn.model_selection import train_test_split

train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)
train_data, val_data = train_test_split(train_data, test_size=0.25, random_state=42)  # 0.25 x 0.8 = 0.2


In [36]:
transform = transforms.Compose([
        transforms.Resize((224, 224)),  # Resize to the size expected by ResNet
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

In [37]:
import torch
from torch.utils.data import Dataset
from PIL import Image
from sklearn.preprocessing import LabelEncoder

class CustomDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        # Convert the Subset to a DataFrame
        self.data = dataframe.reset_index(drop=True)
        self.transform = transform
        self.label_encoder = LabelEncoder()
        self.data['label'] = self.label_encoder.fit_transform(self.data['label'])

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path = self.data.iloc[idx]['image_path']
        image = Image.open(img_path).convert('RGB')
        label = torch.tensor(self.data.iloc[idx]['label'], dtype=torch.long)

        if self.transform:
            image = self.transform(image)

        return image, label

In [38]:
train_dataset = CustomDataset(train_data, transform=transform)
vaild_dataset = CustomDataset(val_data, transform=transform)
test_dataset = CustomDataset(test_data, transform=transform)

In [39]:
train_dataset.transform

Compose(
    Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=True)
    ToTensor()
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
)

In [40]:
train_loader = DataLoader(dataset=train_dataset, batch_size=32, shuffle=True,num_workers=4)
val_loader = DataLoader(dataset=vaild_dataset, batch_size=32, shuffle=False,num_workers=4)
test_loader = DataLoader(dataset=test_dataset, batch_size=32, shuffle=False,num_workers=4)



In [41]:
train_loader

<torch.utils.data.dataloader.DataLoader at 0x79dffdad3c70>

In [42]:
train_loader.dataset

<__main__.CustomDataset at 0x79dffc12c2b0>

In [43]:
for images, labels in train_loader:
      # Your training loop here
      print(f'Batch of images: {images.shape}')
      print(f'Batch of labels: {labels}')
      break  # For demonstration, break after first batch

Batch of images: torch.Size([32, 3, 224, 224])
Batch of labels: tensor([3, 3, 2, 1, 1, 0, 3, 3, 2, 0, 3, 3, 3, 0, 3, 3, 1, 3, 2, 2, 0, 2, 0, 1,
        0, 0, 0, 3, 0, 2, 3, 2])


In [44]:
import torch
import torch.nn as nn
import torchvision.models as models

# Load the pre-trained ResNet model
resnet = models.resnet50(pretrained=True)

# Modify the classifier head
num_classes = 4
num_features = resnet.fc.in_features
resnet.fc = nn.Linear(num_features, num_classes)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(resnet.parameters(), lr=0.001)



In [45]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [46]:
for param in resnet.fc.parameters():
    param.requires_grad = True

In [47]:
def train_model(model, criterion, optimizer, num_epochs=3):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)

        # Print average loss per epoch
        epoch_loss = running_loss / len(train_loader.dataset)
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {epoch_loss:.4f}')

        # Evaluate on validation set
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * inputs.size(0)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_loss /= len(val_loader.dataset)
        val_accuracy = correct / total
        print(f'Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2%}')

In [48]:
train_model(resnet, criterion, optimizer)

Epoch [1/3], Loss: 0.3104
Validation Loss: 0.1575, Validation Accuracy: 95.03%
Epoch [2/3], Loss: 0.1490
Validation Loss: 0.1934, Validation Accuracy: 96.18%
Epoch [3/3], Loss: 0.0964
Validation Loss: 5.0267, Validation Accuracy: 80.20%


In [49]:
def evaluate_model(model, data_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in data_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = correct / total
    print(f'Accuracy: {accuracy:.2%}')

In [50]:
evaluate_model(resnet, test_loader)

Accuracy: 80.66%


In [51]:
torch.save(resnet.state_dict(), 'resnet_model.pth')

In [52]:
model = models.resnet(weights='IMAGENET1K_V1')
torch.save(model.state_dict(), 'model_weights.pth')

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:10<00:00, 51.5MB/s]
