In [None]:

import os
import random
import shutil
import time
import copy
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, Dataset, random_split
from torchvision import transforms, datasets
from PIL import Image
from transformers import ViTModel, ViTImageProcessor
from tqdm import tqdm

In [None]:
class GeoDataset(Dataset):
    def __init__(self, csv_file, image_folder, transform=None):
        self.data = pd.read_csv(csv_file)
        self.image_folder = image_folder
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        image_path = os.path.join(self.image_folder, row['filename'])
        image = Image.open(image_path).convert("RGB")
        if self.transform:
            processed = self.transform(images=image, return_tensors="pt")
            image_tensor = processed["pixel_values"].squeeze(0)
        else:
            image_tensor = transforms.ToTensor()(image)

        latitude = float(row['latitude'])
        longitude = float(row['longitude'])
        coords = torch.tensor([latitude, longitude], dtype=torch.float32)
        
        region_id = int(row['Region_ID']) - 1  
        region_id = torch.tensor(region_id, dtype=torch.long)
        
        return image_tensor, region_id, coords  

In [None]:

def batch_resize(input_dir, output_dir, size=(224, 224), exts={'.jpg', '.jpeg', '.png', '.bmp', '.tiff'}):
    """
    Resize all images in input_dir to the given size and save them in output_dir.

    Args:
        input_dir (str): Path to folder containing original images.
        output_dir (str): Path to folder where resized images will be saved.
        size (tuple): Desired output size, e.g. (224, 224).
        exts (set): Image file extensions to process.
    """
    os.makedirs(output_dir, exist_ok=True)
    for fname in os.listdir(input_dir):
        base, ext = os.path.splitext(fname)
        if ext.lower() not in exts:
            continue

        in_path  = os.path.join(input_dir, fname)
        out_path = os.path.join(output_dir, fname)

        with Image.open(in_path) as img:
            # Use high-quality downsampling filter
            resized = img.resize(size, resample=Image.LANCZOS)  
            resized.save(out_path)

# Reszize images in the specified directories to 224x224 to fit the ViT model

batch_resize(
        input_dir="/data3/pratyush.jena/misc/MMT/SMAI_Project/Phase_2_data/Phase_2_data/images_train",
        output_dir="/data3/pratyush.jena/misc/MMT/SMAI_Project/Phase_2_data/Phase_2_data/images_train_new",
        size=(224, 224)
    )
 
batch_resize(
        input_dir="/data3/pratyush.jena/misc/MMT/SMAI_Project/Phase_2_data/Phase_2_data/images_val",
        output_dir="/data3/pratyush.jena/misc/MMT/SMAI_Project/Phase_2_data/Phase_2_data/images_val_new",
        size=(224, 224)
    )

In [None]:
import pandas as pd

df = pd.read_csv(
    '/data3/pratyush.jena/misc/MMT/SMAI_Project/Phase_2_data/Phase_2_data/area_predictions_train.csv'
)

mask = (
    (df['longitude'] < 140000) |
    (df['longitude'] > 150000) |
    (df['latitude']  < 200000) |
    (df['latitude']  > 230000)
)


cleaned_df = df.loc[~mask]  

cleaned_df.to_csv(
    '/data3/pratyush.jena/misc/MMT/SMAI_Project/area_predictions_train_need.csv',
    index=False
) 


df = pd.read_csv(
    '/data3/pratyush.jena/misc/MMT/SMAI_Project/Phase_2_data/Phase_2_data/area_predictions_val.csv'
)

mask = (
    (df['longitude'] < 140000) |
    (df['longitude'] > 150000) |
    (df['latitude']  < 200000) |
    (df['latitude']  > 230000)
)


cleaned_df = df.loc[~mask]  

cleaned_df.to_csv(
    '/data3/pratyush.jena/misc/MMT/SMAI_Project/area_predictions_val_cleaned.csv',
    index=False
) 

In [None]:
train_csv_path = '/data3/pratyush.jena/misc/MMT/SMAI_Project/area_predictions_train_need.csv'
val_csv_path = '/data3/pratyush.jena/misc/MMT/SMAI_Project/area_predictions_val_cleaned.csv'
train_images_folder = '/data3/pratyush.jena/misc/MMT/SMAI_Project/Phase_2_data/Phase_2_data/images_train_new'
val_images_folder = '/data3/pratyush.jena/misc/MMT/SMAI_Project/Phase_2_data/Phase_2_data/images_val_new'

In [None]:
model_name = 'google/vit-base-patch16-224-in21k'
processor = ViTImageProcessor.from_pretrained(model_name)
vit = ViTModel.from_pretrained(model_name)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
vit.to(device)

random.seed(11)
torch.manual_seed(11)


In [None]:
train_dataset = GeoDataset(csv_file=train_csv_path, image_folder=train_images_folder, transform=processor)
val_dataset = GeoDataset(csv_file=val_csv_path, image_folder=val_images_folder, transform=processor)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

print(f"Train set size: {len(train_dataset)}")
print(f"Val set size: {len(val_dataset)}")

In [None]:
def extract_features(loader, model):
    new_features = []
    new_regions = []
    new_coords = []
    model.eval()
    
    with torch.no_grad():
        for inputs, regions, coords in tqdm(loader, desc="Extracting features", unit="batch"):
            inputs = inputs.to(device)
            outputs = model(pixel_values=inputs)
            cls_hidden_state = outputs.last_hidden_state[:, 0, :]
            new_features.append(cls_hidden_state.cpu())
            new_regions.append(regions)
            new_coords.append(coords)
    
    return torch.cat(new_features), torch.cat(new_regions), torch.cat(new_coords)

In [None]:
train_features, train_regions, train_coords = extract_features(train_loader, vit)
val_features, val_regions, val_coords = extract_features(val_loader, vit)

train_inFeatures_dataset = TensorDataset(train_features, train_regions, train_coords)
val_inFeatures_dataset = TensorDataset(val_features, val_regions, val_coords)

train_loader_new = DataLoader(train_inFeatures_dataset, batch_size=16, shuffle=True)
val_loader_new = DataLoader(val_inFeatures_dataset, batch_size=16, shuffle=False)

In [None]:

class geoNet(nn.Module):
    def __init__(self, num_regions=15):
        super(geoNet, self).__init__()

        self.fc1 = nn.Linear(768 + num_regions, 512)
        self.bn1 = nn.BatchNorm1d(512)
        self.fc2 = nn.Linear(512, 256)
        self.bn2 = nn.BatchNorm1d(256)
        self.fc3 = nn.Linear(256, 128)
        self.bn3 = nn.BatchNorm1d(128)
        self.fc4 = nn.Linear(128, 64)
        self.bn4 = nn.BatchNorm1d(64)
        self.regressor = nn.Linear(64, 2)

        self.num_regions = num_regions

    def forward(self, x, region_ids):
        region_onehot = F.one_hot(region_ids, num_classes=self.num_regions).float()

        combined = torch.cat([x, region_onehot], dim=1)

        combined = F.relu(self.bn1(self.fc1(combined)))
        combined = F.relu(self.bn2(self.fc2(combined)))
        combined = F.relu(self.bn3(self.fc3(combined)))
        combined = F.relu(self.bn4(self.fc4(combined)))
        coords_pred = self.regressor(combined)
        return coords_pred

In [None]:
def calc_mae(net, dataloader):
    total_mae = 0.0
    net.eval()
    with torch.no_grad():
        for features, regions, coords in dataloader:
            features, regions, coords = features.to(device), regions.to(device), coords.to(device)
            preds = net(features, regions)
            total_mae += F.l1_loss(preds, coords, reduction='sum').item()
    return total_mae / len(dataloader.dataset)

def calc_mse(net, dataloader, flag=False, csv_path='coords_preds.csv'):
    coords_preds = []
    batch_coords = []
    net.eval()
    with torch.no_grad():
        for features, regions, coords in dataloader:
            features, regions, coords = features.to(device), regions.to(device), coords.to(device)
            preds = net(features, regions)
            coords_preds.extend(preds.cpu().numpy())
            batch_coords.extend(coords.cpu().numpy())
    
    coords_preds = np.array(coords_preds)
    batch_coords = np.array(batch_coords)
    
    if flag:
        df_out = pd.DataFrame({
            'id': np.arange(coords_preds.shape[0]),
            'Latitude': coords_preds[:, 0],
            'Longitude': coords_preds[:, 1]
        })
        df_out.to_csv(csv_path, index=False)
        print(f"Predictions saved to {csv_path}")
    
    return np.mean((coords_preds - batch_coords) ** 2)

In [None]:
def train_net(model, train_loader, val_loader, num_epochs, learning_rate, patience=10):
    criterion = nn.L1Loss()
    optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=0.0001)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5, factor=0.1)
    
    train_losses = []
    val_maes = []
    best_mae = float('inf')
    best_model_wts = None
    epochs_no_improve = 0
    
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        
        for features, regions, coords in train_loader:
            features, regions, coords = features.to(device), regions.to(device), coords.to(device)
            
            optimizer.zero_grad()
            outputs = model(features, regions)
            loss = criterion(outputs, coords)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * features.size(0)
        
        epoch_loss = running_loss / len(train_loader.dataset)
        train_losses.append(epoch_loss)
        
        val_mae = calc_mae(model, val_loader)
        val_maes.append(val_mae)
        
        scheduler.step(val_mae)
        
        if val_mae < best_mae:
            best_mae = val_mae
            best_model_wts = copy.deepcopy(model.state_dict())
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1
            
        print(f'Epoch {epoch+1}/{num_epochs}')
        print(f'Train Loss: {epoch_loss:.4f} | Val MAE: {val_mae:.4f}')
        
        if epochs_no_improve >= patience:
            print(f'\nEarly stopping after {patience} epochs without improvement')
            break
    
    model.load_state_dict(best_model_wts)
    torch.save(model.state_dict(), 'best_model_coord.pt')
    
    val_mae = calc_mae(model, val_loader)
    val_mse = calc_mse(model, val_loader, flag=True)
    print(f'\nFinal Evaluation:')
    print(f'Val MAE: {val_mae:.4f} | Val MSE: {val_mse:.4f}')
    
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(train_losses, label='Training Loss')
    plt.title('Training Loss Curve')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    
    plt.subplot(1, 2, 2)
    plt.plot(val_maes, label='Validation MAE')
    plt.title('Validation MAE Curve')
    plt.xlabel('Epoch')
    plt.ylabel('MAE')
    plt.legend()
    
    plt.tight_layout()
    plt.show()

In [None]:
model = geoNet()
if torch.cuda.is_available():
    model.cuda()
    print('CUDA is available! Training on GPU ...')
else:
    print('CUDA is not available. Training on CPU ...')


In [None]:
train_net(model, 
         train_loader_new, 
         val_loader_new, 
         num_epochs=100,
         learning_rate=1e-3,
         patience=10)

In [None]:
model = geoNet()
if torch.cuda.is_available():
    model.cuda()
    print('CUDA is available! Training on GPU ...')
else:
    print('CUDA is not available. Training on CPU ...')
    
model.load_state_dict(torch.load('best_model_coord.pt', map_location=device))
model.eval()

calc_mse(model, val_loader_new)


In [None]:
test_csv     = "/data3/pratyush.jena/misc/MMT/SMAI_Project/area_predictions_test_with_filenames.csv" #(Created using test images and image numbers)
test_img_dir = "/data3/pratyush.jena/misc/MMT/SMAI_Project/Phase_2_data/Phase_2_data/images_test"

In [None]:
net = geoNet().to(device)
checkpoint = torch.load('best_model_coord.pt', map_location=device)
net.load_state_dict(checkpoint)
net.eval()

In [None]:
df_test = pd.read_csv(test_csv)  
df_test['Region_ID0'] = df_test['Region_ID'] - 1

In [None]:
class TestGeoDataset(Dataset):
    def __init__(self, df, img_dir, processor):
        self.df = df
        self.img_dir = img_dir
        self.proc = processor
    def __len__(self):
        return len(self.df)
    def __getitem__(self, i):
        row = self.df.iloc[i]
        img = Image.open(os.path.join(self.img_dir, row.filename)).convert('RGB')
        px = self.proc(images=img, return_tensors='pt')['pixel_values'].squeeze(0)
        return px, torch.tensor(row.Region_ID0, dtype=torch.long), row.id

In [None]:
test_ds = TestGeoDataset(df_test, test_img_dir, processor)
test_loader = DataLoader(test_ds, batch_size=16, shuffle=False)
vit = ViTModel.from_pretrained('google/vit-base-patch16-224-in21k').to(device)
vit.eval()

In [None]:
# Feature extraction
features, region_ids, ids = [], [], []
with torch.no_grad():
    for px, rid, idx in test_loader:
        px = px.to(device)
        out = vit(pixel_values=px).last_hidden_state[:,0,:]
        features.append(out.cpu())
        region_ids.append(rid)
        ids.extend(idx.numpy().tolist())
features = torch.cat(features)
region_ids = torch.cat(region_ids)


In [None]:
with torch.no_grad():
    coords_pred = net(features.to(device), region_ids.to(device)).cpu().numpy()

# Save yesman.csv
df_out = pd.DataFrame({
    'id':       ids,
    'Latitude':  coords_pred[:,0],
    'Longitude': coords_pred[:,1]
})
df_out.to_csv('co_test.csv', index=False)
print(f"Saved {len(df_out)} rows to co_test.csv")

In [None]:
import pandas as pd

# Load the CSV file
df = pd.read_csv('co_test.csv')

# Add 369 to every value in the 'id' column
df['id'] = df['id'] - 369

# Save the updated DataFrame back to CSV
df.to_csv('co_test.csv', index=False)