In [2]:
#!pip install -q huggingface_hub

In [3]:
from huggingface_hub import hf_hub_download

zip_path = hf_hub_download(
    repo_id="rupesh002/Patholes_Dataset",
    repo_type="dataset",
    filename="pothole_dataset.zip"
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


pothole_dataset.zip:   0%|          | 0.00/306M [00:00<?, ?B/s]

In [4]:
import zipfile
import os


os.makedirs("pothole_data", exist_ok=True)


with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall("pothole_data")

In [5]:
import pandas as pd
train_csv = "pothole_data/pothole_dataset/train_ids_labels.csv"
test_csv = "pothole_data/pothole_dataset/test_ids_only.csv"

train_df = pd.read_csv(train_csv)
test_df = pd.read_csv(test_csv)

print(" Columns in training CSV:", list(train_df.columns), "\n")
print(train_df.head(), "\n")

 Columns in training CSV: ['Image_ID', 'Label'] 

          Image_ID  Label
0  cVOfkSdqnWXUerr      0
1  EhnvIDPXFFjUhkR      0
2  tYKqoStvHsryFhS      1
3  eSpjlsZIwOMLmUS      1
4  uzxhIXjNENLyHwZ      1 



In [6]:
# Fix test image paths
image_root = "pothole_data/pothole_dataset/all_data"

def find_image_path(img_id):
    for ext in [".jpg", ".jpeg", ".png", ".JPG", ".JPEG", ".PNG"]:
        path = os.path.join(image_root, img_id + ext)
        if os.path.exists(path):
            return path
    return None

test_df["Image_ID"] = test_df["Image_ID"].apply(find_image_path)
test_df = test_df.dropna().reset_index(drop=True)

print(f"✅ Found {len(test_df)} test images")
print(test_df.head())

✅ Found 1650 test images
                                            Image_ID
0  pothole_data/pothole_dataset/all_data/nRhHxqLh...
1  pothole_data/pothole_dataset/all_data/gbSntVov...
2  pothole_data/pothole_dataset/all_data/nqOobGmv...
3  pothole_data/pothole_dataset/all_data/oIkPTooL...
4  pothole_data/pothole_dataset/all_data/eSKxsTTJ...


In [7]:
image_root = "pothole_data/pothole_dataset/all_data"
print(f"Found images in: {image_root}")

Found images in: pothole_data/pothole_dataset/all_data


In [8]:
if "Image_ID" in train_df.columns:
    img_col = "Image_ID"
elif "image" in train_df.columns:
    img_col = "image"
elif "filename" in train_df.columns:
    img_col = "filename"
else:
    raise KeyError("Could nt find image column in train CSV. Check train_df.columns().")


def make_path(x):
    path = os.path.join(image_root, f"{x}.JPG")
    if not os.path.exists(path):
        path = os.path.join(image_root, f"{x}.jpg")
    return path

train_df[img_col] = train_df[img_col].apply(make_path)


train_df = train_df[train_df[img_col].apply(os.path.exists)].reset_index(drop=True)
print(f"All usable images: {len(train_df)}")
print(train_df.head())


All usable images: 4026
                                            Image_ID  Label
0  pothole_data/pothole_dataset/all_data/cVOfkSdq...      0
1  pothole_data/pothole_dataset/all_data/EhnvIDPX...      0
2  pothole_data/pothole_dataset/all_data/tYKqoStv...      1
3  pothole_data/pothole_dataset/all_data/eSpjlsZI...      1
4  pothole_data/pothole_dataset/all_data/uzxhIXjN...      1


In [9]:
print(train_df.head())
print(test_df.head())

                                            Image_ID  Label
0  pothole_data/pothole_dataset/all_data/cVOfkSdq...      0
1  pothole_data/pothole_dataset/all_data/EhnvIDPX...      0
2  pothole_data/pothole_dataset/all_data/tYKqoStv...      1
3  pothole_data/pothole_dataset/all_data/eSpjlsZI...      1
4  pothole_data/pothole_dataset/all_data/uzxhIXjN...      1
                                            Image_ID
0  pothole_data/pothole_dataset/all_data/nRhHxqLh...
1  pothole_data/pothole_dataset/all_data/gbSntVov...
2  pothole_data/pothole_dataset/all_data/nqOobGmv...
3  pothole_data/pothole_dataset/all_data/oIkPTooL...
4  pothole_data/pothole_dataset/all_data/eSKxsTTJ...


In [10]:
from PIL import Image
import torch
from torch.utils.data import Dataset

class potholes(Dataset):
  def __init__ (self, dataframe, transform):
    self.df = dataframe
    self.transform = transform
  def __len__(self):
    return len(self.df)
  def __getitem__(self, index):
    image_path = self.df.iloc[index]["Image_ID"]
    label = self.df.iloc[index]["Label"]

    image = Image.open(image_path).convert("RGB")

    if self.transform:
      image = self.transform(image)

    return image, torch.tensor(label, dtype=torch.long)

In [11]:
from torchvision import transforms
from torch.utils.data import DataLoader

transform = transforms.Compose(
    [
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.ToTensor(),
    ]
)

train_dataset = potholes(train_df,transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

In [12]:
import torch.nn as nn
import torchvision.models as models
model = models.resnet18(pretrained=True)
for param in model.parameters():
    param.requires_grad = True

model.fc = nn.Linear(model.fc.in_features,2 )



Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:00<00:00, 197MB/s]


In [13]:
import torch.optim as optim

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

for epoch in range(15):  # increase epochs later
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {running_loss:.4f}")


Epoch 1, Loss: 34.5215
Epoch 2, Loss: 16.8792
Epoch 3, Loss: 13.8673
Epoch 4, Loss: 8.7849
Epoch 5, Loss: 8.5060
Epoch 6, Loss: 7.0612
Epoch 7, Loss: 5.3233
Epoch 8, Loss: 4.5851
Epoch 9, Loss: 4.8884
Epoch 10, Loss: 4.5228
Epoch 11, Loss: 3.6127
Epoch 12, Loss: 2.8293
Epoch 13, Loss: 3.7737
Epoch 14, Loss: 2.5612
Epoch 15, Loss: 1.8422


In [14]:
class PotholeTestDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.df = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = self.df.iloc[idx]["Image_ID"]
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, self.df.iloc[idx]["Image_ID"]

test_dataset = PotholeTestDataset(test_df, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32)

model.eval()
predictions = []

with torch.no_grad():
    for images, ids in test_loader:
        images = images.to(device)
        outputs = model(images)
        probs = torch.softmax(outputs, dim=1)[:, 1]
        predictions.extend(zip(ids, probs.cpu().numpy()))



In [15]:
import pandas as pd

import pandas as pd


submission_df = pd.DataFrame(predictions, columns=["Image_Path", "Pothole_Probability"])

submission_df["Image_ID"] = submission_df["Image_Path"].apply(lambda x: x.split("/")[-1].split(".")[0])

submission_df = submission_df[["Image_ID", "Pothole_Probability"]]

submission_df["Pothole_Probability"] = submission_df["Pothole_Probability"].apply(lambda x: 1 if x > 0.5 else 0)

# save csv
submission_df.to_csv("final_submission.csv", index=False)

print(" Final submission file created: final_submission.csv")
submission_df.head()


from google.colab import files
files.download("final_submission.csv")

 Final submission file created: final_submission.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [16]:
import joblib
joblib.dump(model, "model.pkl")
files.download('model.pkl')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [17]:
!jupyter nbconvert --ClearMetadataPreprocessor.enabled=True --to notebook --output cleaned_notebook.ipynb Untitled0.ipynb


This application is used to convert notebook files (*.ipynb)
        to various other formats.


Options
The options below are convenience aliases to configurable class-options,
as listed in the "Equivalent to" description-line of the aliases.
To see all configurable class-options for some <cmd>, use:
    <cmd> --help-all

--debug
    set log level to logging.DEBUG (maximize logging output)
    Equivalent to: [--Application.log_level=10]
--show-config
    Show the application's configuration (human-readable format)
    Equivalent to: [--Application.show_config=True]
--show-config-json
    Show the application's configuration (json format)
    Equivalent to: [--Application.show_config_json=True]
--generate-config
    generate default config file
    Equivalent to: [--JupyterApp.generate_config=True]
-y
    Answer yes to any questions instead of prompting.
    Equivalent to: [--JupyterApp.answer_yes=True]
--execute
    Execute the notebook prior to export.
    Equivalent to: [--ExecutePr