In [None]:
%pip install easyocr

Collecting easyocr
  Downloading easyocr-1.7.2-py3-none-any.whl.metadata (10 kB)
Collecting python-bidi (from easyocr)
  Downloading python_bidi-0.6.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)
Collecting pyclipper (from easyocr)
  Downloading pyclipper-1.3.0.post6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.0 kB)
Collecting ninja (from easyocr)
  Downloading ninja-1.11.1.4-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl.metadata (5.0 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->easyocr)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch->easyocr)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch->easyocr)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (

In [None]:
import os
import sys
import cv2
import torch
import numpy as np
from collections import Counter
from torchvision import transforms
from PIL import Image

from Tex2loc import Tex2loc
from GeoGessr import GeoGuessCountryClassifier
from tekstoinator import extract_text_from_video

def load_classifier_model(model_path, device, num_classes):
    model = GeoGuessCountryClassifier(num_classes=num_classes)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.to(device)
    model.eval()
    return model

def preprocess_frame(frame):
    preprocess = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ])
    return preprocess(frame)

def predict_country_on_video(video_path, model, device, label_encoder):
    cap = cv2.VideoCapture(video_path)
    predictions = []
    frame_count = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frame_count += 1

        img_tensor = preprocess_frame(frame).unsqueeze(0).to(device)
        with torch.no_grad():
            outputs = model(img_tensor)
            _, pred = torch.max(outputs, 1)
            predictions.append(pred.item())

    cap.release()

    most_common_idx = Counter(predictions).most_common(1)[0][0]
    most_common_country = label_encoder.classes_[most_common_idx]
    return most_common_country


In [None]:
video_path = "input/video.mp4"

if not os.path.isfile(video_path):
    print(f"Video file '{video_path}' nie istnieje!")
    sys.exit(1)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Używany device: {device}")

print("1. Wykonywanie OCR na video...")
extracted_text_list = extract_text_from_video(video_path, frame_interval=25)
extracted_text = " ".join(extracted_text_list)
print(f"Znaleziony tekst (fragment): {extracted_text[:500]}...")



Używany device: cuda
1. Wykonywanie OCR na video...
Znaleziony tekst (fragment): 3DBEN VL.CHEMICZNA ABEN PORT LOTNICRY_ OBNODNICA PYSZNEPL CZASW [CHOLM UL;CHEMICZNA ICZY BURGERA MYIPLE BURGE BURGERI IATAN CHEŁM PORT LOTNICZY RAHEN RABEN CHOŁM ZAMOIC CZAS ZAMOTE ONALD MOJDON RABE CZASAA HABEN CHOLM RABON CZASWA WYINLE URGEK BURGER CZAS] NALDS RABER [RABEN 98 POKA ZAMOSC...


In [None]:
print(extracted_text)
print("2. Analiza lokalizacji z tekstu...")
tex2loc = Tex2loc(device=device.type)

3DBEN VL.CHEMICZNA ABEN PORT LOTNICRY_ OBNODNICA PYSZNEPL CZASW [CHOLM UL;CHEMICZNA ICZY BURGERA MYIPLE BURGE BURGERI IATAN CHEŁM PORT LOTNICZY RAHEN RABEN CHOŁM ZAMOIC CZAS ZAMOTE ONALD MOJDON RABE CZASAA HABEN CHOLM RABON CZASWA WYINLE URGEK BURGER CZAS] NALDS RABER [RABEN 98 POKA ZAMOSC
2. Analiza lokalizacji z tekstu...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0


In [None]:

location_info = tex2loc.get_location_info(extracted_text)
print(f"Lokalizacja z tekstu: {location_info}")



The `seen_tokens` attribute is deprecated and will be removed in v4.41. Use the `cache_position` model input instead.


AttributeError: 'DynamicCache' object has no attribute 'get_max_length'

In [None]:
print("3. Ładowanie klasyfikatora obrazów...")

import pandas as pd
from sklearn.preprocessing import LabelEncoder

df = pd.read_csv("dataset/country_dataset.csv", header=None, names=['country', 'lat', 'lon', 'local_path'])

counts = df['country'].value_counts()
valid_countries = counts[counts >= 2].index
df = df[df['country'].isin(valid_countries)]

countries = sorted(df['country'].unique())

le = LabelEncoder()
le.fit(countries)

model_path = "best_model.pt"
model = load_classifier_model(model_path, device, num_classes=len(le.classes_))
print("Model załadowany.")

print("4. Predykcje na klatkach video...")
most_common_country = predict_country_on_video(video_path, model, device, le)
print(f"Najczęściej przewidywany kraj na video: {most_common_country}")

print("\n--- Podsumowanie ---")
print(f"Tekst OCR: {extracted_text[:500]}...")
print(f"Lokalizacja tekstowa: Miasto: {location_info['city']}, Kraj: {location_info['country']}, Kontynent: {location_info['continent']}")
print(f"Predykcja modelu na klatkach: {most_common_country}")