In [None]:
!pip install torch torchvision pillow tqdm --quiet

import os, zipfile, glob, random, shutil
from tqdm import tqdm
from PIL import Image, ImageDraw, ImageEnhance

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from google.colab import files

print("üëâ Upload the Kaggle ZIP: expenses-receipt-ocr.zip")
uploaded = files.upload()  # select expenses-receipt-ocr.zip here

zip_name = list(uploaded.keys())[0]
print("Uploaded:", zip_name)

# Unzip into current directory
with zipfile.ZipFile(zip_name, "r") as z:
    z.extractall()

print("üìÇ After unzip, we have:")
print(os.listdir())


üëâ Upload the Kaggle ZIP: expenses-receipt-ocr.zip


In [None]:
DATA_ROOT = "receipt_fraud"
GEN_DIR = os.path.join(DATA_ROOT, "genuine")
FAKE_DIR = os.path.join(DATA_ROOT, "fake")
os.makedirs(GEN_DIR, exist_ok=True)
os.makedirs(FAKE_DIR, exist_ok=True)

image_paths = []
for ext in ["**/*.jpg", "**/*.jpeg", "**/*.png", "**/*.JPG", "**/*.JPEG", "**/*.PNG"]:
    image_paths.extend(glob.glob(ext, recursive=True))

image_paths = [p for p in image_paths if "/sample_data/" not in p and "/.config/" not in p]

print("Found images:", len(image_paths))
if len(image_paths) == 0:
    raise SystemExit("‚ùå No images found. Check that the Kaggle ZIP contains jpg/png files.")

def make_fake(src, dst):
    img = Image.open(src).convert("RGB")
    w, h = img.size
    img = ImageEnhance.Brightness(img).enhance(random.uniform(0.8, 1.3))
    img = ImageEnhance.Contrast(img).enhance(random.uniform(0.8, 1.4))
    draw = ImageDraw.Draw(img)
    for _ in range(2):
        x1 = random.randint(0, max(0, w - 120))
        y1 = random.randint(0, max(0, h - 60))
        draw.rectangle([x1, y1, x1 + 120, y1 + 40], fill=(255, 255, 255))
    img.save(dst)

genuine, fake = [], []
for i, src in enumerate(tqdm(image_paths, desc="Preparing dataset")):
    name = f"img_{i:05d}.jpg"
    g = os.path.join(GEN_DIR, name)
    f = os.path.join(FAKE_DIR, name)
    shutil.copy(src, g)
    make_fake(src, f)
    genuine.append((g, 0))
    fake.append((f, 1))

dataset = genuine + fake
random.shuffle(dataset)
print("Dataset ready ‚úîÔ∏è | Total samples =", len(dataset))


Found images: 371


Preparing dataset: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 371/371 [00:32<00:00, 11.56it/s]

Dataset ready ‚úîÔ∏è | Total samples = 742





In [None]:
tf = transforms.Compose([
    transforms.Resize((256,256)),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])

class BillDS(Dataset):
    def __init__(self, items): self.items = items
    def __len__(self): return len(self.items)
    def __getitem__(self, i):
        p, lbl = self.items[i]
        return tf(Image.open(p).convert("RGB")), lbl

dl = DataLoader(BillDS(dataset), batch_size=16, shuffle=True)

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
model.fc = nn.Linear(model.fc.in_features, 2)
model = model.to(DEVICE)

opt = torch.optim.Adam(model.parameters(), lr=1e-4)
loss_fn = nn.CrossEntropyLoss()

EPOCHS = 3
for epoch in range(EPOCHS):
    model.train()
    correct = total = 0
    for x, y in dl:
        x, y = x.to(DEVICE), y.to(DEVICE)
        opt.zero_grad()
        out = model(x)
        loss = loss_fn(out, y)
        loss.backward()
        opt.step()
        total += y.size(0)
        correct += (out.argmax(1) == y).sum().item()
    print(f"Epoch {epoch+1} ‚Üí Accuracy: {correct/total:.4f}")

torch.save(model.state_dict(), "fakebill_resnet18.pth")
print("‚úîÔ∏è Model saved as fakebill_resnet18.pth")


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 44.7M/44.7M [00:00<00:00, 182MB/s]


Epoch 1 ‚Üí Accuracy: 0.4609
Epoch 2 ‚Üí Accuracy: 0.6051
Epoch 3 ‚Üí Accuracy: 0.6927
‚úîÔ∏è Model saved as fakebill_resnet18.pth


In [None]:
!apt-get -y install tesseract-ocr > /dev/null
!pip install pytesseract --quiet

import pytesseract
from PIL import Image
from google.colab import files
import re
from datetime import datetime

print("üëâ Upload any bill image (real or fake) to test the system")
up = files.upload()
test_img = list(up.keys())[0]

# ---- 1) Image forgery (your ResNet18 model) ----
def predict_image(path):
    img = Image.open(path).convert("RGB")
    t = tf(img).unsqueeze(0).to(DEVICE)
    model.eval()
    with torch.no_grad():
        out = model(t)
        prob = torch.softmax(out, dim=1)[0]
        cls = prob.argmax().item()
    return ("Fake Bill" if cls == 1 else "Genuine Bill", prob[cls].item())

label, conf = predict_image(test_img)
print("\nüìå Image Forgery Check:", label, f"({conf*100:.2f}% confidence)")

# ---- 2) OCR with Tesseract ----
img = Image.open(test_img)
raw_text = pytesseract.image_to_string(img)
print("\nüßæ OCR Extracted Text:\n", raw_text)

# ---- 3) Pricing & timing anomaly logic ----
KNOWN_RESTAURANTS = {
    "starbucks": 150,
    "kfc": 220,
    "mcdonald": 120,
    "domino": 350,
    "barbeque": 1400,
    "taj": 2500
}

restaurant = next((r for r in KNOWN_RESTAURANTS if r in raw_text.lower()), None)

total = None
m = re.search(r'(‚Çπ|rs\.?|inr)?\s?(\d{2,6})', raw_text.lower())
if m:
    total = int(m.group(2))

time_hour = None
tm = re.search(r'(\d{1,2}:\d{2})', raw_text)
if tm:
    try:
        time_hour = datetime.strptime(tm.group(), "%H:%M").hour
    except:
        pass

issues = []

# price check
if restaurant and total:
    expected = KNOWN_RESTAURANTS[restaurant]
    if total > expected * 4:
        issues.append("Unusually high amount for this restaurant")
    elif total < expected * 0.4:
        issues.append("Unusually low amount for this restaurant")

# time check
if time_hour is not None:
    if time_hour < 5 or time_hour > 23:
        issues.append("Bill issued outside normal restaurant hours")

# ---- FINAL DECISION ----
if issues:
    print("\n‚ö† PRICE & TIMING ANALYSIS ‚Üí RISK")
    for i in issues:
        print("   ‚ùó", i)
else:
    print("\n‚úî PRICE & TIMING ANALYSIS ‚Üí PASS")


üëâ Upload any bill image (real or fake) to test the system


Saving 0.jpg to 0 (1).jpg

üìå Image Forgery Check: Genuine Bill (93.64% confidence)

üßæ OCR Extracted Text:
  

WAL*MART

ALWAYS LOW PRICES.

oe

   

SUPERCENTER
OPEN 24 HOURS
MANAGER . TBA
( 515 ) 986 - 1783
ST# 5748 OP# 00000158 TE# 14 TRH 03178
BANANAS 00000000401 1KF
0.41 Ib @ 1 Ib /0.49 0.20 N
FRAP 001200010451 F 5.48 N
DISCOUNT GIVEN 0.57
5 SUBIQIA. 8.11
5.
y CASH TEND 1100
CHANGE DUE ‚Äú5 ‚Äú@g

  


‚úî PRICE & TIMING ANALYSIS ‚Üí PASS
