### Scrape Images from URL

#### 1) Gaza-Israel Dataset (Date of Scraping: 27.05.2025)


In [42]:
import os 
import pandas as pd 
from tqdm import tqdm 
import requests 
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
import time
from PIL import Image 
from io import BytesIO
import numpy as np

In [13]:
os.getcwd()

'/Users/fabilochner/Documents/Kopenhagen/MSc Social Data Science/Courses/4th semester/Master Thesis/Practical Implementation/gaza_ukraine_datasets/scrape_images'

In [16]:
## import Gaza-Israel dataset (after claim reformulation) (Checked URL's validity: 07.05.2025)

df_gaza_israel = pd.read_csv("../gaza_israel/Combined_dataset/gaza_israel_dataset_combined_010724_300425_after_claim_reformulation.csv",sep=';', header=0, index_col = 0)
print(len(df_gaza_israel))
print(df_gaza_israel.dtypes)
df_gaza_israel.head()

100
id                            int64
Website                      object
Article_URL                  object
Headline                     object
Claim_Date                   object
Review_Date                  object
Query/Keyword                object
Label_Website                object
Image_URL                    object
Original_Claim_Website       object
Original_Claim_Only          object
Context/Label_Explanation    object
Text_Only_Claim                bool
Normal_Image                   bool
AI_Generated_Image             bool
Altered_Image                  bool
Data_Collection_Type         object
Label                        object
Claim                        object
dtype: object


Unnamed: 0,id,Website,Article_URL,Headline,Claim_Date,Review_Date,Query/Keyword,Label_Website,Image_URL,Original_Claim_Website,Original_Claim_Only,Context/Label_Explanation,Text_Only_Claim,Normal_Image,AI_Generated_Image,Altered_Image,Data_Collection_Type,Label,Claim
0,0,AFP Factcheck,https://factcheck.afp.com/doc.afp.com.42JE3T2,False claims about US general being killed in ...,2025-04-06 00:00:00,2025-04-17 00:00:00,Israeli-Palestinian conflict,False,https://pbs.twimg.com/media/Gn4wjgBXoAALax1?fo...,"""BREAKING: Reports confirm the death of Genera...","""BREAKING: Reports confirm the death of Genera...",A reverse image search reveals the photograph ...,False,True,False,False,Manual,False,"This image shows General John Pagri, Commander..."
1,1,AFP Factcheck,https://factcheck.afp.com/doc.afp.com.39432WD,"Pictures show Gaza in ruins, not Myanmar quake...",2025-03-29 00:00:00,2025-04-17 00:00:00,Israeli-Palestinian conflict,False,https://web.archive.org/web/20250402070342im_/...,"""A 7.9-magnitude earthquake in Myanmar, devast...","""A 7.9-magnitude earthquake in Myanmar, devast...","However, a reverse image search for the first ...",False,True,False,False,Manual,False,This image shows the ruins after a 7.9-magnitu...
2,2,AFP Factcheck,https://factcheck.afp.com/doc.afp.com.37W26WD,Image of 'Gazan mother with son's bones' is ma...,2025-03-24 00:00:00,2025-03-27 00:00:00,Israeli-Palestinian conflict,False,https://scontent-hou1-1.xx.fbcdn.net/v/t39.308...,"""A mother holds the bones of her son in Gaza. ...","""A mother holds the bones of her son in Gaza. ...",A keyword search found a wider-angle version o...,False,False,True,False,Manual,False,This is an authentic image of a mother holding...
3,3,AFP Factcheck,https://factcheck.afp.com/doc.afp.com.36XY2FV,Old image misrepresented as 'Israeli settlers'...,2025-02-16 00:00:00,2025-02-19 00:00:00,Israeli-Palestinian conflict,False,https://pbs.twimg.com/media/Gj5tbXiaQAAoG1z?fo...,"""Israeli media reports there are Jews who have...","""Israeli media reports there are Jews who have...",A reverse image search via Google traced the v...,False,True,False,False,Manual,False,This image shows Israelis who have stopped fig...
4,4,AFP Factcheck,https://factcheck.afp.com/doc.afp.com.36RF6BZ,"Picture shows Israel's 2018 Gaza attack, not a...",2024-12-22 00:00:00,2024-12-26 00:00:00,Israeli-Palestinian conflict,False,https://media.gettyimages.com/id/1060718622/de...,"""Americans bombarded Yemen in reprisal after Y...","""Americans bombarded Yemen in reprisal after Y...",But a reverse image search on Google found the...,False,True,False,False,Manual,False,This image shows the U.S. bombarding Yemen on ...


In [17]:
# check if "Label" is in big letters
print(df_gaza_israel["Label"].value_counts() )

## Rename "FALSE" into "False" and "TRUE" into "True" 

df_gaza_israel["Label"] = df_gaza_israel["Label"].replace({'FALSE': 'False', 'TRUE': 'True'})

## Check label letters again

print(df_gaza_israel["Label"].value_counts()) ##worked




Label
FALSE         59
Misleading    31
TRUE           6
NEI            4
Name: count, dtype: int64
Label
False         59
Misleading    31
True           6
NEI            4
Name: count, dtype: int64


In [9]:
# How many claims with images? Amount of images and image_URLs should be the same

## Check 1: Image_URL column 

image_url_non_nan_count = df_gaza_israel["Image_URL"].notna().sum()
print(f"Amount of Claims with Images: {image_url_non_nan_count}")


image_url_nan_count = df_gaza_israel["Image_URL"].isna().sum()
print(f"Amount of Claims without Images: {image_url_nan_count}")


## Check 2: Text_Only_Claim column

print("\n", df_gaza_israel["Text_Only_Claim"].value_counts())

## Check 3: Image columns 

print("\n",df_gaza_israel["Normal_Image"].value_counts())
print("\n",df_gaza_israel["AI_Generated_Image"].value_counts())
print("\n",df_gaza_israel["Altered_Image"].value_counts())





Amount of Claims with Images: 65
Amount of Claims without Images: 35

 Text_Only_Claim
False    65
True     35
Name: count, dtype: int64

 Normal_Image
False    51
True     49
Name: count, dtype: int64

 AI_Generated_Image
False    91
True      9
Name: count, dtype: int64

 Altered_Image
False    93
True      7
Name: count, dtype: int64


Maybe just scrape the image in one picture format (e.g., jpg):

- Would be easier to store the images (would not require one folder for each image, but all images would be stored in one folder)
- VERITE also just uses one format (jpg)
- MOCHEG uses different formats (jpg, jpeg, png), but also only one format per image 

In [55]:
def save_image_jpg_format(url, base_folder, topic, image_id):

    """ 
    
    This function downloads an image from a URL and saves it in one format (jpg) - the same that was used in the VERITE benchmark and worked in DEFAME Github repo.
    

    Args:

        url: The image URL to download
        base_folder: The base folder where all images are stored ("images")
        topic: The topic folder ("gaza_israel" or "ukraine_russia")
        image_id: The ID of the image from the respective dataset


    Returns:
        
        dict: Paths to the saved images or None if failed
    
    """

    # Generate the folder path with new structure 
    topic_folder = os.path.join(base_folder, topic) #gaza or ukraine
    

    # Create folders
    for folder in [base_folder, topic_folder]:
        if not os.path.exists(folder):
            os.makedirs(folder)

    # Create filename with image_id 
    filename_base = f"{image_id}"
    

    # Initialize the file paths dictionary 
    file_paths = {
    "jpg": os.path.join(topic_folder, f"{filename_base}.jpg"),
    }

    # Check if all files already exist (to avoid re-downloading)
    if all(os.path.exists(path) for path in file_paths.values()):
        print(f"Image {filename_base} already exists in all formats. Skipping.")
        return file_paths 
    
    
    # Create a session/using the requests retry function to deal with connection errors

    ## Create a session
    session = requests.Session()

    ## Define a retry strategy
    retry_strategy = Retry(
        total = 5, 
        backoff_factor = 1, 
        status_forcelist=[429, 500, 502, 503, 504], 
        method_whitelist= ["HEAD", "GET", "OPTIONS"]
    )

    ## Mount the retry strategy to the session
    adapter = HTTPAdapter(max_retries=retry_strategy)
    session.mount("http://", adapter)
    session.mount("https://", adapter)



    # Download the image
    
    try:

        response = session.get(url, verify=False, headers={"User-Agent":"Mozilla/5.0"}, timeout=80) #added headers to handle error messages 

        if response.status_code == 200:
            # Open the image
            img = Image.open(BytesIO(response.content))

            width, height = img.size 

            # Resize large images
            if 2400 > width > 1200 or 2400 > height > 1200:
                img = img.resize((width//2, height//2))

            if width > 2400 or height > 2400:
                img = img.resize((width//4, height//4))

            # Convert to RGB if needed 
            if not img.mode == "RBB":
                img = img.convert("RGB")

            # Save each image in the jpg format
            img.save(file_paths['jpg'], format = "JPEG")


            print(f"Successfully saved {filename_base} in jpg format.")
            return file_paths
        
        else:
            print(f"Failed to download image: {url}, status code: {response.status_code}")
            return None 
        
    except Exception as e:
        print(f"Error processing {url}: {str(e)}")
        return None 


    

    
def scrape_images(df, base_folder, topic):

    """ 
    Scrape the images from the image URL provided in a dataframe and update the dataframe with image paths

    Args:
        df: Pandas DataFrame with image URLs
        base_folder: Base folder to save the images to 
        topic: Topic folder ("gaza_israel" or "ukraine_russia")

    Returns:
        Updated DataFrame with image path columns
    
    """

    # Create a copy of df to avoid modifying the original df 
    df_updated = df.copy()

    # Add columns to df with the image path 
    df_updated["Image_Path"] = None


    # Count how many rows have image URLs
    total_with_urls = df_updated["Image_URL"].notna().sum()
    print(f"Found {total_with_urls} claims with iamge URLs out of {len(df_updated)} claims")

    # Create a list of rows with image URLs
    rows_with_urls = df_updated[df_updated["Image_URL"].notna()].index

    # Scrape images 
    for idx in tqdm(rows_with_urls, desc=f"Scraping {topic} images"):
        row = df_updated.loc[idx]
        image_id = row["id"] #track image id
        url = row["Image_URL"] # use column with image url

        


        if pd.notna(url) and url:
            # Download and save the images
            file_paths = save_image_jpg_format(url, base_folder, topic, image_id)
            # Add a time sleep here to avoid connection errors
            time.sleep(5)

            if file_paths:
                # Update dataframe with file path
                df_updated.at[idx, "Image_Path"] = file_paths["jpg"]
                

    return df_updated 
    


- Source for parts of the scraping code: https://github.com/stevejpapad/image-text-verification/blob/master/prepare_datasets.py

In [15]:
df_gaza_israel.head()

Unnamed: 0,id,Website,Article_URL,Headline,Claim_Date,Review_Date,Query/Keyword,Label_Website,Image_URL,Original_Claim_Website,Original_Claim_Only,Context/Label_Explanation,Text_Only_Claim,Normal_Image,AI_Generated_Image,Altered_Image,Data_Collection_Type,Label,Claim
0,0,AFP Factcheck,https://factcheck.afp.com/doc.afp.com.42JE3T2,False claims about US general being killed in ...,2025-04-06 0:00:00,2025-04-17 0:00:00,Israeli-Palestinian conflict,False,https://pbs.twimg.com/media/Gn4wjgBXoAALax1?fo...,"""BREAKING: Reports confirm the death of Genera...","""BREAKING: Reports confirm the death of Genera...",A reverse image search reveals the photograph ...,False,True,False,False,Manual,False,"This image shows General John Pagri, Commander..."
1,1,AFP Factcheck,https://factcheck.afp.com/doc.afp.com.39432WD,"Pictures show Gaza in ruins, not Myanmar quake...",2025-03-29 0:00:00,2025-04-17 0:00:00,Israeli-Palestinian conflict,False,https://web.archive.org/web/20250402070342im_/...,"""A 7.9-magnitude earthquake in Myanmar, devast...","""A 7.9-magnitude earthquake in Myanmar, devast...","However, a reverse image search for the first ...",False,True,False,False,Manual,False,This image shows the ruins after a 7.9-magnitu...
2,2,AFP Factcheck,https://factcheck.afp.com/doc.afp.com.37W26WD,Image of 'Gazan mother with son's bones' is ma...,2025-03-24 0:00:00,2025-03-27 0:00:00,Israeli-Palestinian conflict,False,https://web.archive.org/web/20250527092039/htt...,"""A mother holds the bones of her son in Gaza. ...","""A mother holds the bones of her son in Gaza. ...",A keyword search found a wider-angle version o...,False,False,True,False,Manual,False,This is an authentic image of a mother holding...
3,3,AFP Factcheck,https://factcheck.afp.com/doc.afp.com.36XY2FV,Old image misrepresented as 'Israeli settlers'...,2025-02-16 0:00:00,2025-02-19 0:00:00,Israeli-Palestinian conflict,False,https://pbs.twimg.com/media/Gj5tbXiaQAAoG1z?fo...,"""Israeli media reports there are Jews who have...","""Israeli media reports there are Jews who have...",A reverse image search via Google traced the v...,False,True,False,False,Manual,False,This image shows Israelis who have stopped fig...
4,4,AFP Factcheck,https://factcheck.afp.com/doc.afp.com.36RF6BZ,"Picture shows Israel's 2018 Gaza attack, not a...",2024-12-22 0:00:00,2024-12-26 0:00:00,Israeli-Palestinian conflict,False,https://media.gettyimages.com/id/1060718622/de...,"""Americans bombarded Yemen in reprisal after Y...","""Americans bombarded Yemen in reprisal after Y...",But a reverse image search on Google found the...,False,True,False,False,Manual,False,This image shows the U.S. bombarding Yemen on ...


In [16]:
df_gaza_israel_with_image_paths = scrape_images(df=df_gaza_israel, base_folder="images", topic = "gaza_israel")

Found 65 claims with iamge URLs out of 100 claims


Scraping gaza_israel images:   2%|▏         | 1/65 [00:00<00:54,  1.18it/s]

Successfully saved 0 in jpg format.


Scraping gaza_israel images:   3%|▎         | 2/65 [00:02<01:28,  1.40s/it]

Successfully saved 1 in jpg format.


Scraping gaza_israel images:   5%|▍         | 3/65 [00:03<01:21,  1.31s/it]

Successfully saved 2 in jpg format.


Scraping gaza_israel images:   6%|▌         | 4/65 [00:04<01:02,  1.03s/it]

Successfully saved 3 in jpg format.


Scraping gaza_israel images:   8%|▊         | 5/65 [00:05<00:56,  1.06it/s]

Successfully saved 4 in jpg format.


Scraping gaza_israel images:   9%|▉         | 6/65 [00:05<00:48,  1.21it/s]

Successfully saved 5 in jpg format.


Scraping gaza_israel images:  11%|█         | 7/65 [00:14<03:17,  3.41s/it]

Successfully saved 6 in jpg format.


Scraping gaza_israel images:  12%|█▏        | 8/65 [00:15<02:33,  2.69s/it]

Successfully saved 8 in jpg format.


Scraping gaza_israel images:  14%|█▍        | 9/65 [00:16<02:02,  2.19s/it]

Successfully saved 9 in jpg format.


Scraping gaza_israel images:  15%|█▌        | 10/65 [00:17<01:35,  1.73s/it]

Successfully saved 10 in jpg format.


Scraping gaza_israel images:  17%|█▋        | 11/65 [00:19<01:38,  1.83s/it]

Successfully saved 13 in jpg format.


Scraping gaza_israel images:  18%|█▊        | 12/65 [00:20<01:17,  1.47s/it]

Successfully saved 14 in jpg format.


Scraping gaza_israel images:  20%|██        | 13/65 [00:20<01:03,  1.22s/it]

Successfully saved 15 in jpg format.


Scraping gaza_israel images:  22%|██▏       | 14/65 [00:21<00:54,  1.06s/it]

Successfully saved 16 in jpg format.


Scraping gaza_israel images:  23%|██▎       | 15/65 [00:22<00:48,  1.04it/s]

Successfully saved 18 in jpg format.


Scraping gaza_israel images:  25%|██▍       | 16/65 [00:22<00:43,  1.13it/s]

Successfully saved 19 in jpg format.


Scraping gaza_israel images:  26%|██▌       | 17/65 [00:24<00:50,  1.06s/it]

Successfully saved 22 in jpg format.


Scraping gaza_israel images:  28%|██▊       | 18/65 [00:25<00:43,  1.08it/s]

Successfully saved 24 in jpg format.


Scraping gaza_israel images:  29%|██▉       | 19/65 [00:25<00:40,  1.13it/s]

Successfully saved 26 in jpg format.


Scraping gaza_israel images:  31%|███       | 20/65 [00:26<00:37,  1.19it/s]

Successfully saved 27 in jpg format.


Scraping gaza_israel images:  32%|███▏      | 21/65 [00:27<00:40,  1.09it/s]

Successfully saved 30 in jpg format.


Scraping gaza_israel images:  34%|███▍      | 22/65 [00:28<00:43,  1.01s/it]

Successfully saved 31 in jpg format.


Scraping gaza_israel images:  35%|███▌      | 23/65 [00:29<00:39,  1.07it/s]

Successfully saved 34 in jpg format.


Scraping gaza_israel images:  37%|███▋      | 24/65 [00:30<00:35,  1.15it/s]

Successfully saved 35 in jpg format.


Scraping gaza_israel images:  38%|███▊      | 25/65 [00:31<00:33,  1.21it/s]

Successfully saved 37 in jpg format.


Scraping gaza_israel images:  40%|████      | 26/65 [00:31<00:29,  1.34it/s]

Successfully saved 38 in jpg format.


Scraping gaza_israel images:  42%|████▏     | 27/65 [00:33<00:43,  1.15s/it]

Successfully saved 39 in jpg format.


Scraping gaza_israel images:  43%|████▎     | 28/65 [00:34<00:37,  1.00s/it]

Successfully saved 44 in jpg format.


Scraping gaza_israel images:  45%|████▍     | 29/65 [00:35<00:32,  1.11it/s]

Successfully saved 45 in jpg format.


Scraping gaza_israel images:  46%|████▌     | 30/65 [00:35<00:27,  1.26it/s]

Successfully saved 47 in jpg format.


Scraping gaza_israel images:  48%|████▊     | 31/65 [00:36<00:26,  1.28it/s]

Successfully saved 56 in jpg format.


Scraping gaza_israel images:  49%|████▉     | 32/65 [00:37<00:24,  1.35it/s]

Successfully saved 57 in jpg format.


Scraping gaza_israel images:  51%|█████     | 33/65 [00:37<00:22,  1.41it/s]

Successfully saved 58 in jpg format.


Scraping gaza_israel images:  52%|█████▏    | 34/65 [00:38<00:21,  1.45it/s]

Successfully saved 59 in jpg format.


Scraping gaza_israel images:  54%|█████▍    | 35/65 [00:39<00:21,  1.42it/s]

Successfully saved 60 in jpg format.


Scraping gaza_israel images:  55%|█████▌    | 36/65 [00:39<00:18,  1.54it/s]

Successfully saved 61 in jpg format.


Scraping gaza_israel images:  57%|█████▋    | 37/65 [00:40<00:18,  1.52it/s]

Successfully saved 62 in jpg format.


Scraping gaza_israel images:  58%|█████▊    | 38/65 [00:41<00:18,  1.44it/s]

Successfully saved 63 in jpg format.


Scraping gaza_israel images:  60%|██████    | 39/65 [00:41<00:17,  1.45it/s]

Successfully saved 65 in jpg format.


Scraping gaza_israel images:  62%|██████▏   | 40/65 [00:42<00:16,  1.51it/s]

Successfully saved 66 in jpg format.


Scraping gaza_israel images:  63%|██████▎   | 41/65 [00:42<00:14,  1.62it/s]

Successfully saved 68 in jpg format.


Scraping gaza_israel images:  65%|██████▍   | 42/65 [00:43<00:13,  1.70it/s]

Successfully saved 69 in jpg format.


Scraping gaza_israel images:  66%|██████▌   | 43/65 [00:45<00:24,  1.13s/it]

Successfully saved 71 in jpg format.


Scraping gaza_israel images:  68%|██████▊   | 44/65 [00:46<00:20,  1.03it/s]

Successfully saved 72 in jpg format.


Scraping gaza_israel images:  69%|██████▉   | 45/65 [00:46<00:17,  1.17it/s]

Successfully saved 73 in jpg format.


Scraping gaza_israel images:  71%|███████   | 46/65 [00:47<00:14,  1.29it/s]

Successfully saved 75 in jpg format.


Scraping gaza_israel images:  72%|███████▏  | 47/65 [00:51<00:32,  1.80s/it]

Successfully saved 76 in jpg format.


Scraping gaza_israel images:  74%|███████▍  | 48/65 [00:52<00:24,  1.44s/it]

Successfully saved 79 in jpg format.


Scraping gaza_israel images:  75%|███████▌  | 49/65 [00:52<00:18,  1.17s/it]

Successfully saved 80 in jpg format.


Scraping gaza_israel images:  77%|███████▋  | 50/65 [00:53<00:15,  1.03s/it]

Successfully saved 81 in jpg format.


Scraping gaza_israel images:  78%|███████▊  | 51/65 [00:54<00:12,  1.11it/s]

Successfully saved 82 in jpg format.


Scraping gaza_israel images:  80%|████████  | 52/65 [00:54<00:10,  1.23it/s]

Successfully saved 83 in jpg format.


Scraping gaza_israel images:  82%|████████▏ | 53/65 [00:55<00:08,  1.34it/s]

Successfully saved 84 in jpg format.


Scraping gaza_israel images:  83%|████████▎ | 54/65 [00:55<00:07,  1.38it/s]

Successfully saved 85 in jpg format.


Scraping gaza_israel images:  85%|████████▍ | 55/65 [00:57<00:08,  1.19it/s]

Successfully saved 86 in jpg format.


Scraping gaza_israel images:  86%|████████▌ | 56/65 [00:57<00:06,  1.35it/s]

Successfully saved 87 in jpg format.


Scraping gaza_israel images:  88%|████████▊ | 57/65 [00:58<00:05,  1.35it/s]

Successfully saved 88 in jpg format.


Scraping gaza_israel images:  89%|████████▉ | 58/65 [00:58<00:04,  1.45it/s]

Successfully saved 91 in jpg format.


Scraping gaza_israel images:  91%|█████████ | 59/65 [00:59<00:03,  1.50it/s]

Successfully saved 92 in jpg format.


Scraping gaza_israel images:  92%|█████████▏| 60/65 [01:00<00:03,  1.38it/s]

Successfully saved 93 in jpg format.


Scraping gaza_israel images:  94%|█████████▍| 61/65 [01:01<00:02,  1.43it/s]

Successfully saved 94 in jpg format.


Scraping gaza_israel images:  95%|█████████▌| 62/65 [01:01<00:02,  1.36it/s]

Successfully saved 95 in jpg format.


Scraping gaza_israel images:  97%|█████████▋| 63/65 [01:02<00:01,  1.51it/s]

Successfully saved 96 in jpg format.


Scraping gaza_israel images:  98%|█████████▊| 64/65 [01:03<00:00,  1.48it/s]

Successfully saved 98 in jpg format.


Scraping gaza_israel images: 100%|██████████| 65/65 [01:04<00:00,  1.01it/s]

Successfully saved 99 in jpg format.





In [19]:
#check new df with image path

df_gaza_israel_with_image_paths.head()


## check that all images were successfully scraped (there are 65 images in the df)

print(df_gaza_israel_with_image_paths["Image_Path"].isna().sum()) ## should be 35


# Quick verification
print(f"Images with paths: {df_gaza_israel_with_image_paths['Image_Path'].notna().sum()}")
print(f"Images with URLs: {df_gaza_israel_with_image_paths['Image_URL'].notna().sum()}")

# Check if any paths are missing
missing_images = df_gaza_israel_with_image_paths[
    (df_gaza_israel_with_image_paths['Image_URL'].notna()) & 
    (df_gaza_israel_with_image_paths['Image_Path'].isna())
]
print(f"Failed scrapes: {len(missing_images)}")

35
Images with paths: 65
Images with URLs: 65
Failed scrapes: 0


In [41]:
### Reorder the columns before saving the final gaza_israel df

df_gaza_israel_with_image_paths.head()



# reorder columns

new_column_order = [
    'id', 
    'Website', 
    'Article_URL', 
    'Headline', 
    'Claim_Date', 
    'Review_Date', 
    'Query/Keyword',
    'Original_Claim_Website',  
    'Original_Claim_Only',
    'Claim',
    'Image_URL',
    'Image_Path',
    'Label_Website',
    'Label',
    'Context/Label_Explanation', 
    'Text_Only_Claim', 
    'Normal_Image', 
    'AI_Generated_Image', 
    'Altered_Image',
    'Data_Collection_Type'
]

df_gaza_israel_final = df_gaza_israel_with_image_paths[new_column_order]


df_gaza_israel_final.head()

Unnamed: 0,id,Website,Article_URL,Headline,Claim_Date,Review_Date,Query/Keyword,Original_Claim_Website,Original_Claim_Only,Claim,Image_URL,Image_Path,Label_Website,Label,Context/Label_Explanation,Text_Only_Claim,Normal_Image,AI_Generated_Image,Altered_Image,Data_Collection_Type
0,0,AFP Factcheck,https://factcheck.afp.com/doc.afp.com.42JE3T2,False claims about US general being killed in ...,2025-04-06 0:00:00,2025-04-17 0:00:00,Israeli-Palestinian conflict,"""BREAKING: Reports confirm the death of Genera...","""BREAKING: Reports confirm the death of Genera...","This image shows General John Pagri, Commander...",https://pbs.twimg.com/media/Gn4wjgBXoAALax1?fo...,images/gaza_israel/0.jpg,False,False,A reverse image search reveals the photograph ...,False,True,False,False,Manual
1,1,AFP Factcheck,https://factcheck.afp.com/doc.afp.com.39432WD,"Pictures show Gaza in ruins, not Myanmar quake...",2025-03-29 0:00:00,2025-04-17 0:00:00,Israeli-Palestinian conflict,"""A 7.9-magnitude earthquake in Myanmar, devast...","""A 7.9-magnitude earthquake in Myanmar, devast...",This image shows the ruins after a 7.9-magnitu...,https://web.archive.org/web/20250402070342im_/...,images/gaza_israel/1.jpg,False,False,"However, a reverse image search for the first ...",False,True,False,False,Manual
2,2,AFP Factcheck,https://factcheck.afp.com/doc.afp.com.37W26WD,Image of 'Gazan mother with son's bones' is ma...,2025-03-24 0:00:00,2025-03-27 0:00:00,Israeli-Palestinian conflict,"""A mother holds the bones of her son in Gaza. ...","""A mother holds the bones of her son in Gaza. ...",This is an authentic image of a mother holding...,https://web.archive.org/web/20250527092039/htt...,images/gaza_israel/2.jpg,False,False,A keyword search found a wider-angle version o...,False,False,True,False,Manual
3,3,AFP Factcheck,https://factcheck.afp.com/doc.afp.com.36XY2FV,Old image misrepresented as 'Israeli settlers'...,2025-02-16 0:00:00,2025-02-19 0:00:00,Israeli-Palestinian conflict,"""Israeli media reports there are Jews who have...","""Israeli media reports there are Jews who have...",This image shows Israelis who have stopped fig...,https://pbs.twimg.com/media/Gj5tbXiaQAAoG1z?fo...,images/gaza_israel/3.jpg,False,False,A reverse image search via Google traced the v...,False,True,False,False,Manual
4,4,AFP Factcheck,https://factcheck.afp.com/doc.afp.com.36RF6BZ,"Picture shows Israel's 2018 Gaza attack, not a...",2024-12-22 0:00:00,2024-12-26 0:00:00,Israeli-Palestinian conflict,"""Americans bombarded Yemen in reprisal after Y...","""Americans bombarded Yemen in reprisal after Y...",This image shows the U.S. bombarding Yemen on ...,https://media.gettyimages.com/id/1060718622/de...,images/gaza_israel/4.jpg,False,False,But a reverse image search on Google found the...,False,True,False,False,Manual


In [42]:
df_gaza_israel_final

Unnamed: 0,id,Website,Article_URL,Headline,Claim_Date,Review_Date,Query/Keyword,Original_Claim_Website,Original_Claim_Only,Claim,Image_URL,Image_Path,Label_Website,Label,Context/Label_Explanation,Text_Only_Claim,Normal_Image,AI_Generated_Image,Altered_Image,Data_Collection_Type
0,0,AFP Factcheck,https://factcheck.afp.com/doc.afp.com.42JE3T2,False claims about US general being killed in ...,2025-04-06 0:00:00,2025-04-17 0:00:00,Israeli-Palestinian conflict,"""BREAKING: Reports confirm the death of Genera...","""BREAKING: Reports confirm the death of Genera...","This image shows General John Pagri, Commander...",https://pbs.twimg.com/media/Gn4wjgBXoAALax1?fo...,images/gaza_israel/0.jpg,FALSE,False,A reverse image search reveals the photograph ...,False,True,False,False,Manual
1,1,AFP Factcheck,https://factcheck.afp.com/doc.afp.com.39432WD,"Pictures show Gaza in ruins, not Myanmar quake...",2025-03-29 0:00:00,2025-04-17 0:00:00,Israeli-Palestinian conflict,"""A 7.9-magnitude earthquake in Myanmar, devast...","""A 7.9-magnitude earthquake in Myanmar, devast...",This image shows the ruins after a 7.9-magnitu...,https://web.archive.org/web/20250402070342im_/...,images/gaza_israel/1.jpg,FALSE,False,"However, a reverse image search for the first ...",False,True,False,False,Manual
2,2,AFP Factcheck,https://factcheck.afp.com/doc.afp.com.37W26WD,Image of 'Gazan mother with son's bones' is ma...,2025-03-24 0:00:00,2025-03-27 0:00:00,Israeli-Palestinian conflict,"""A mother holds the bones of her son in Gaza. ...","""A mother holds the bones of her son in Gaza. ...",This is an authentic image of a mother holding...,https://web.archive.org/web/20250527092039/htt...,images/gaza_israel/2.jpg,FALSE,False,A keyword search found a wider-angle version o...,False,False,True,False,Manual
3,3,AFP Factcheck,https://factcheck.afp.com/doc.afp.com.36XY2FV,Old image misrepresented as 'Israeli settlers'...,2025-02-16 0:00:00,2025-02-19 0:00:00,Israeli-Palestinian conflict,"""Israeli media reports there are Jews who have...","""Israeli media reports there are Jews who have...",This image shows Israelis who have stopped fig...,https://pbs.twimg.com/media/Gj5tbXiaQAAoG1z?fo...,images/gaza_israel/3.jpg,FALSE,False,A reverse image search via Google traced the v...,False,True,False,False,Manual
4,4,AFP Factcheck,https://factcheck.afp.com/doc.afp.com.36RF6BZ,"Picture shows Israel's 2018 Gaza attack, not a...",2024-12-22 0:00:00,2024-12-26 0:00:00,Israeli-Palestinian conflict,"""Americans bombarded Yemen in reprisal after Y...","""Americans bombarded Yemen in reprisal after Y...",This image shows the U.S. bombarding Yemen on ...,https://media.gettyimages.com/id/1060718622/de...,images/gaza_israel/4.jpg,FALSE,False,But a reverse image search on Google found the...,False,True,False,False,Manual
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,95,logicallyfacts.com,https://www.logicallyfacts.com/en/fact-check/f...,"No, photo doesn’t show Gaza man holding hand o...",2024-07-14 00:00:00+00:00,2024-07-16 00:00:00+00:00,Israel,,This photo shows a man from Gaza holding the h...,This image shows a man from Gaza holding the h...,https://media.cnn.com/api/v1/images/stellar/pr...,images/gaza_israel/95.jpg,FALSE,False,"Upon conducting a reverse image search, we fo...",False,True,False,False,API
96,96,misbar.com,https://www.misbar.com/en/factcheck/2024/07/15...,Israel Tragically Killed the Hamida Siblings i...,2024-07-15 00:00:00+00:00,2024-07-15 00:00:00+00:00,Israel,,Mayar Hamida (9 years old) and Bilal Hamida (6...,This image shows Mayar Hamida (9 years old) an...,https://assets.misbar.com/styles/large_870x490...,images/gaza_israel/96.jpg,misleading,Misleading,Israel Killed the Two Siblings in October 202...,False,True,False,False,API
97,97,usatoday.com,https://www.usatoday.com/story/news/factcheck/...,False claim Americans drafted for Israel-Hamas...,2024-07-09 00:00:00+00:00,2024-07-12 00:00:00+00:00,Israel,,US reinstituted draft for Israel-Hamas war,The US reinstituted draft in 2024 for the Isra...,,,FALSE,False,The U.S. is not conscripting Americans into t...,True,False,False,False,API
98,98,logicallyfacts.com,https://www.logicallyfacts.com/en/fact-check/f...,AI image of Dwayne Johnson shared to claim tha...,2024-07-20 00:00:00+00:00,2024-07-11 00:00:00+00:00,Israel,,The image shows Dwayne Johnson wearing Israeli...,This is an authentic image of Dwayne Johnson w...,https://pbs.twimg.com/media/F-GfaZzWMAAmsmm?fo...,images/gaza_israel/98.jpg,fake,False,A close look at the image showed several disc...,False,False,True,False,API


In [43]:
# Convert None values to NaN in the Image_Path column to be consistent with all other NaN values in other columsn of df
df_gaza_israel_final["Image_Path"] = df_gaza_israel_final["Image_Path"].replace({None: np.nan})

# Verify it worked
print("None values in column 'Image_Path':", (df_gaza_israel_final["Image_Path"] == None).sum().sum())
print("NaN values in column 'Image_Path':", df_gaza_israel_final["Image_Path"].isnull().sum().sum())

## Check that no other column contains None values
print("None values in column 'Image_Path':", (df_gaza_israel_final == None).sum().sum())
print("NaN values in column 'Image_Path':", df_gaza_israel_final.isnull().sum().sum())

None values in column 'Image_Path': 0
NaN values in column 'Image_Path': 35
None values in column 'Image_Path': 0
NaN values in column 'Image_Path': 113


In [45]:
## Save final df as csv 

os.getcwd()

df_gaza_israel_final.to_csv("../gaza_israel/Combined_dataset/gaza_israel_dataset_combined_010724_300425_final.csv")

#### 2) Ukraine-Russia Dataset (Date of Scraping: 08.07.2025)


In [30]:
## import Gaza-Israel dataset (after claim reformulation) (Checked URL's validity: 08.07.2025)

df_ukraine_russia = pd.read_csv("../ukraine_russia/Combined_dataset/ukraine_russia_dataset_combined_010724_300425_after_claim_reformulation.csv", sep=';', header=0, index_col = 0)
print(len(df_ukraine_russia))
print(df_ukraine_russia.dtypes)
df_ukraine_russia.head()

79
id                            int64
Website                      object
Article_URL                  object
Headline                     object
Claim_Date                   object
Review_Date                  object
Query/Keyword                object
Label_Website                object
Image_URL                    object
Original_Claim_Website       object
Original_Claim_Only          object
Context/Label_Explanation    object
Text_Only_Claim                bool
Normal_Image                   bool
AI_Generated_Image             bool
Altered_Image                  bool
Data_Collection_Type         object
Label                        object
Claim                        object
dtype: object


Unnamed: 0,id,Website,Article_URL,Headline,Claim_Date,Review_Date,Query/Keyword,Label_Website,Image_URL,Original_Claim_Website,Original_Claim_Only,Context/Label_Explanation,Text_Only_Claim,Normal_Image,AI_Generated_Image,Altered_Image,Data_Collection_Type,Label,Claim
0,0,AFP Factcheck,https://factcheck.afp.com/doc.afp.com.372Y6CV,Fake newspaper cover on Ukrainian soldiers in ...,2025-03-15 00:00:00,2025-03-19 00:00:00,"""War in Ukraine""",altered,https://web.archive.org/web/20250708170622/htt...,"""The Kursk expedition was a disaster and a com...","""70,000 Ukrainian soldiers in the Kursk region...",But the supposed Hull Daily Mail headline blas...,False,False,False,True,Manual,False,This image shows a screenshot of an authentic ...
1,1,AFP Factcheck,https://factcheck.afp.com/doc.afp.com.36YR9KZ,"No, Zelensky hasn't bought Eagle's Nest, it is...",2025-02-18 00:00:00,2025-02-27 00:00:00,"""War in Ukraine""",FALSE,,"According to the latest claims, Zelensky alleg...","According to the latest claims, Zelensky alleg...",The Eagle's Nest is in the property of the sta...,True,False,False,False,Manual,False,Ukrainian President Volodymyr Zelenskyy purcha...
2,2,AFP Factcheck,https://factcheck.afp.com/doc.afp.com.36YC3DG,Claims that Ukraine banned Truth Social are false,2025-02-20 00:00:00,2025-02-21 00:00:00,"""War in Ukraine""",FALSE,,"""BREAKING: Zelensky blocks access to President...","""BREAKING: Zelensky blocks access to President...",A spokesperson for Trump Media and Technology ...,True,False,False,False,Manual,False,In February 2025 Ukrainian President Volodymyr...
3,3,AFP Factcheck,https://factcheck.afp.com/doc.afp.com.36P98ZW,Fake 'apocalypse' cover of The Economist circu...,2024-11-18 00:00:00,2024-12-03 00:00:00,"""War in Ukraine""",FALSE,https://web.archive.org/web/20250708170927/htt...,"""APOCALYPSE: Allowing missile strikes deep int...","""APOCALYPSE: Allowing missile strikes deep int...","However, The Economist does list Telegram amon...",False,False,False,True,Manual,False,This image shows a screenshot of an authentic ...
4,4,AFP Factcheck,https://factcheck.afp.com/doc.afp.com.36MM6QY,Old photo misrepresented as coffins of 'Britis...,2024-11-03 00:00:00,2024-11-27 00:00:00,"""War in Ukraine""",FALSE,https://web.archive.org/web/20250708171047/htt...,"""Recently, 18 members of the British special f...","""18 British Special Forces were killed in Ukra...",A reverse image search and keyword searches on...,False,True,False,False,Manual,False,This image shows the coffins of 18 British Spe...


In [31]:
# check if "Label" is in big letters
print(df_ukraine_russia["Label"].value_counts() )

## Rename "FALSE" into "False" and "TRUE" into "True" 

df_ukraine_russia["Label"] = df_ukraine_russia["Label"].replace({'FALSE': 'False', 'TRUE': 'True'})

## Check label letters again

print(df_ukraine_russia["Label"].value_counts()) ##worked




Label
FALSE         53
Misleading    15
TRUE           9
NEI            2
Name: count, dtype: int64
Label
False         53
Misleading    15
True           9
NEI            2
Name: count, dtype: int64


In [32]:
# How many claims with images? Amount of images and image_URLs should be the same

## Check 1: Image_URL column 

image_url_non_nan_count = df_ukraine_russia["Image_URL"].notna().sum()
print(f"Amount of Claims with Images: {image_url_non_nan_count}")


image_url_nan_count = df_ukraine_russia["Image_URL"].isna().sum()
print(f"Amount of Claims without Images: {image_url_nan_count}")


## Check 2: Text_Only_Claim column

print("\n", df_ukraine_russia["Text_Only_Claim"].value_counts())

## Check 3: Image columns 

print("\n",df_ukraine_russia["Normal_Image"].value_counts())
print("\n",df_ukraine_russia["AI_Generated_Image"].value_counts())
print("\n",df_ukraine_russia["Altered_Image"].value_counts())





Amount of Claims with Images: 26
Amount of Claims without Images: 53

 Text_Only_Claim
True     53
False    26
Name: count, dtype: int64

 Normal_Image
False    62
True     17
Name: count, dtype: int64

 AI_Generated_Image
False    79
Name: count, dtype: int64

 Altered_Image
False    70
True      9
Name: count, dtype: int64


In [58]:
## Scrape the images (use function defined above in Gaza-Israel dataset part)

df_ukraine_russia_with_image_paths = scrape_images(df=df_ukraine_russia, base_folder="images", topic = "ukraine_russia")

Found 26 claims with iamge URLs out of 79 claims


Scraping ukraine_russia images:   0%|          | 0/26 [00:00<?, ?it/s]

Image 0 already exists in all formats. Skipping.


Scraping ukraine_russia images:   4%|▍         | 1/26 [00:05<02:05,  5.01s/it]

Image 3 already exists in all formats. Skipping.


Scraping ukraine_russia images:   8%|▊         | 2/26 [00:10<02:00,  5.00s/it]

Image 4 already exists in all formats. Skipping.


Scraping ukraine_russia images:  12%|█▏        | 3/26 [00:15<01:55,  5.01s/it]

Image 5 already exists in all formats. Skipping.


Scraping ukraine_russia images:  15%|█▌        | 4/26 [00:20<01:50,  5.01s/it]

Image 6 already exists in all formats. Skipping.


Scraping ukraine_russia images:  19%|█▉        | 5/26 [00:25<01:45,  5.01s/it]

Image 8 already exists in all formats. Skipping.


Scraping ukraine_russia images:  23%|██▎       | 6/26 [00:30<01:40,  5.01s/it]

Image 9 already exists in all formats. Skipping.


Scraping ukraine_russia images:  27%|██▋       | 7/26 [00:35<01:35,  5.01s/it]

Image 10 already exists in all formats. Skipping.


Scraping ukraine_russia images:  31%|███       | 8/26 [00:40<01:30,  5.01s/it]

Image 12 already exists in all formats. Skipping.


Scraping ukraine_russia images:  35%|███▍      | 9/26 [00:45<01:25,  5.01s/it]

Image 13 already exists in all formats. Skipping.


Scraping ukraine_russia images:  38%|███▊      | 10/26 [00:50<01:20,  5.01s/it]

Image 14 already exists in all formats. Skipping.


Scraping ukraine_russia images:  42%|████▏     | 11/26 [00:55<01:15,  5.01s/it]

Image 22 already exists in all formats. Skipping.


Scraping ukraine_russia images:  46%|████▌     | 12/26 [01:00<01:10,  5.01s/it]

Image 33 already exists in all formats. Skipping.


Scraping ukraine_russia images:  50%|█████     | 13/26 [01:05<01:05,  5.01s/it]

Image 47 already exists in all formats. Skipping.


Scraping ukraine_russia images:  54%|█████▍    | 14/26 [01:10<01:00,  5.01s/it]

Image 48 already exists in all formats. Skipping.


Scraping ukraine_russia images:  58%|█████▊    | 15/26 [01:15<00:55,  5.01s/it]

Image 49 already exists in all formats. Skipping.


Scraping ukraine_russia images:  62%|██████▏   | 16/26 [01:20<00:50,  5.01s/it]

Image 56 already exists in all formats. Skipping.


Scraping ukraine_russia images:  65%|██████▌   | 17/26 [01:25<00:45,  5.01s/it]

Image 59 already exists in all formats. Skipping.


Scraping ukraine_russia images:  69%|██████▉   | 18/26 [01:30<00:40,  5.01s/it]

Image 60 already exists in all formats. Skipping.


Scraping ukraine_russia images:  73%|███████▎  | 19/26 [01:35<00:35,  5.01s/it]

Image 62 already exists in all formats. Skipping.


  retry_strategy = Retry(


Successfully saved 69 in jpg format.




Successfully saved 73 in jpg format.


Scraping ukraine_russia images:  85%|████████▍ | 22/26 [01:54<00:24,  6.01s/it]

Image 74 already exists in all formats. Skipping.


Scraping ukraine_russia images:  88%|████████▊ | 23/26 [01:59<00:17,  5.71s/it]

Image 75 already exists in all formats. Skipping.


Scraping ukraine_russia images:  92%|█████████▏| 24/26 [02:04<00:11,  5.50s/it]

Image 77 already exists in all formats. Skipping.


Scraping ukraine_russia images:  96%|█████████▌| 25/26 [02:09<00:05,  5.35s/it]

Image 78 already exists in all formats. Skipping.


Scraping ukraine_russia images: 100%|██████████| 26/26 [02:14<00:00,  5.17s/it]


In [59]:
#check new df with image path

df_ukraine_russia_with_image_paths.head()


## check that all images were successfully scraped 

print(df_ukraine_russia_with_image_paths["Image_Path"].isna().sum()) 


# Quick verification
print(f"Images with paths: {df_ukraine_russia_with_image_paths['Image_Path'].notna().sum()}")
print(f"Images with URLs: {df_ukraine_russia_with_image_paths['Image_URL'].notna().sum()}")

# Check if any paths are missing
missing_images = df_ukraine_russia_with_image_paths[
    (df_ukraine_russia_with_image_paths['Image_URL'].notna()) & 
    (df_ukraine_russia_with_image_paths['Image_Path'].isna())
]
print(f"Failed scrapes: {len(missing_images)}")

53
Images with paths: 26
Images with URLs: 26
Failed scrapes: 0


In [60]:
### Reorder the columns before saving the final gaza_israel df

df_ukraine_russia_with_image_paths.head()



# reorder columns

new_column_order = [
    'id', 
    'Website', 
    'Article_URL', 
    'Headline', 
    'Claim_Date', 
    'Review_Date', 
    'Query/Keyword',
    'Original_Claim_Website',  
    'Original_Claim_Only',
    'Claim',
    'Image_URL',
    'Image_Path',
    'Label_Website',
    'Label',
    'Context/Label_Explanation', 
    'Text_Only_Claim', 
    'Normal_Image', 
    'AI_Generated_Image', 
    'Altered_Image',
    'Data_Collection_Type'
]

df_ukraine_russia_final = df_ukraine_russia_with_image_paths[new_column_order]


df_ukraine_russia_final.head()

Unnamed: 0,id,Website,Article_URL,Headline,Claim_Date,Review_Date,Query/Keyword,Original_Claim_Website,Original_Claim_Only,Claim,Image_URL,Image_Path,Label_Website,Label,Context/Label_Explanation,Text_Only_Claim,Normal_Image,AI_Generated_Image,Altered_Image,Data_Collection_Type
0,0,AFP Factcheck,https://factcheck.afp.com/doc.afp.com.372Y6CV,Fake newspaper cover on Ukrainian soldiers in ...,2025-03-15 00:00:00,2025-03-19 00:00:00,"""War in Ukraine""","""The Kursk expedition was a disaster and a com...","""70,000 Ukrainian soldiers in the Kursk region...",This image shows a screenshot of an authentic ...,https://web.archive.org/web/20250708170622/htt...,images/ukraine_russia/0.jpg,altered,False,But the supposed Hull Daily Mail headline blas...,False,False,False,True,Manual
1,1,AFP Factcheck,https://factcheck.afp.com/doc.afp.com.36YR9KZ,"No, Zelensky hasn't bought Eagle's Nest, it is...",2025-02-18 00:00:00,2025-02-27 00:00:00,"""War in Ukraine""","According to the latest claims, Zelensky alleg...","According to the latest claims, Zelensky alleg...",Ukrainian President Volodymyr Zelenskyy purcha...,,,FALSE,False,The Eagle's Nest is in the property of the sta...,True,False,False,False,Manual
2,2,AFP Factcheck,https://factcheck.afp.com/doc.afp.com.36YC3DG,Claims that Ukraine banned Truth Social are false,2025-02-20 00:00:00,2025-02-21 00:00:00,"""War in Ukraine""","""BREAKING: Zelensky blocks access to President...","""BREAKING: Zelensky blocks access to President...",In February 2025 Ukrainian President Volodymyr...,,,FALSE,False,A spokesperson for Trump Media and Technology ...,True,False,False,False,Manual
3,3,AFP Factcheck,https://factcheck.afp.com/doc.afp.com.36P98ZW,Fake 'apocalypse' cover of The Economist circu...,2024-11-18 00:00:00,2024-12-03 00:00:00,"""War in Ukraine""","""APOCALYPSE: Allowing missile strikes deep int...","""APOCALYPSE: Allowing missile strikes deep int...",This image shows a screenshot of an authentic ...,https://web.archive.org/web/20250708170927/htt...,images/ukraine_russia/3.jpg,FALSE,False,"However, The Economist does list Telegram amon...",False,False,False,True,Manual
4,4,AFP Factcheck,https://factcheck.afp.com/doc.afp.com.36MM6QY,Old photo misrepresented as coffins of 'Britis...,2024-11-03 00:00:00,2024-11-27 00:00:00,"""War in Ukraine""","""Recently, 18 members of the British special f...","""18 British Special Forces were killed in Ukra...",This image shows the coffins of 18 British Spe...,https://web.archive.org/web/20250708171047/htt...,images/ukraine_russia/4.jpg,FALSE,False,A reverse image search and keyword searches on...,False,True,False,False,Manual


In [61]:
df_ukraine_russia_final

Unnamed: 0,id,Website,Article_URL,Headline,Claim_Date,Review_Date,Query/Keyword,Original_Claim_Website,Original_Claim_Only,Claim,Image_URL,Image_Path,Label_Website,Label,Context/Label_Explanation,Text_Only_Claim,Normal_Image,AI_Generated_Image,Altered_Image,Data_Collection_Type
0,0,AFP Factcheck,https://factcheck.afp.com/doc.afp.com.372Y6CV,Fake newspaper cover on Ukrainian soldiers in ...,2025-03-15 00:00:00,2025-03-19 00:00:00,"""War in Ukraine""","""The Kursk expedition was a disaster and a com...","""70,000 Ukrainian soldiers in the Kursk region...",This image shows a screenshot of an authentic ...,https://web.archive.org/web/20250708170622/htt...,images/ukraine_russia/0.jpg,altered,False,But the supposed Hull Daily Mail headline blas...,False,False,False,True,Manual
1,1,AFP Factcheck,https://factcheck.afp.com/doc.afp.com.36YR9KZ,"No, Zelensky hasn't bought Eagle's Nest, it is...",2025-02-18 00:00:00,2025-02-27 00:00:00,"""War in Ukraine""","According to the latest claims, Zelensky alleg...","According to the latest claims, Zelensky alleg...",Ukrainian President Volodymyr Zelenskyy purcha...,,,FALSE,False,The Eagle's Nest is in the property of the sta...,True,False,False,False,Manual
2,2,AFP Factcheck,https://factcheck.afp.com/doc.afp.com.36YC3DG,Claims that Ukraine banned Truth Social are false,2025-02-20 00:00:00,2025-02-21 00:00:00,"""War in Ukraine""","""BREAKING: Zelensky blocks access to President...","""BREAKING: Zelensky blocks access to President...",In February 2025 Ukrainian President Volodymyr...,,,FALSE,False,A spokesperson for Trump Media and Technology ...,True,False,False,False,Manual
3,3,AFP Factcheck,https://factcheck.afp.com/doc.afp.com.36P98ZW,Fake 'apocalypse' cover of The Economist circu...,2024-11-18 00:00:00,2024-12-03 00:00:00,"""War in Ukraine""","""APOCALYPSE: Allowing missile strikes deep int...","""APOCALYPSE: Allowing missile strikes deep int...",This image shows a screenshot of an authentic ...,https://web.archive.org/web/20250708170927/htt...,images/ukraine_russia/3.jpg,FALSE,False,"However, The Economist does list Telegram amon...",False,False,False,True,Manual
4,4,AFP Factcheck,https://factcheck.afp.com/doc.afp.com.36MM6QY,Old photo misrepresented as coffins of 'Britis...,2024-11-03 00:00:00,2024-11-27 00:00:00,"""War in Ukraine""","""Recently, 18 members of the British special f...","""18 British Special Forces were killed in Ukra...",This image shows the coffins of 18 British Spe...,https://web.archive.org/web/20250708171047/htt...,images/ukraine_russia/4.jpg,FALSE,False,A reverse image search and keyword searches on...,False,True,False,False,Manual
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
74,74,checkyourfact.com,https://checkyourfact.com/2024/09/19/fact-chec...,FACT CHECK: Did A Small Nuclear Strike Hit Rus...,2024-09-18 00:00:00+00:00,2024-09-19 00:00:00+00:00,Russia,,Posts shared on X claims that a North Atlantic...,This image shows a Russian ammunition depot in...,https://web.archive.org/web/20250313135839/htt...,images/ukraine_russia/74.jpg,FALSE,False,This claim is false. The attack was not from N...,False,True,False,False,API
75,75,misbar.com,https://www.misbar.com/en/factcheck/2024/08/26...,An Old Image of Ukrainians Circulated As Ameri...,2024-08-26 00:00:00+00:00,2024-08-26 00:00:00+00:00,Russia,,American mercenaries exposed invading Russia's...,This image shows American mercenaries particip...,https://web.archive.org/web/20250708182706/htt...,images/ukraine_russia/75.jpg,misleading,Misleading,An Image Of A Ukrainian Solider Taken In Balak...,False,True,False,False,API
76,76,misbar.com,https://www.misbar.com/en/factcheck/2024/08/22...,"Joe Biden Updates Nuclear Strategy, No Imminen...",2024-08-22 00:00:00+00:00,2024-08-22 00:00:00+00:00,Russia,,Biden ordered U.S. forces to prepare for possi...,"While serving as US President in 2024, Joe Bid...",,,excitement,True,New York Times' Article Misinterpreted The ar...,True,False,False,False,API
77,77,misbar.com,https://www.misbar.com/en/factcheck/2024/08/20...,Poland Marks Historic Victory Over the Red Arm...,2024-08-20 00:00:00+00:00,2024-08-20 00:00:00+00:00,Russia,,POLAND The Army is preparing for war with Russ...,This image shows a military parade of Poland's...,https://web.archive.org/web/20250124154226/htt...,images/ukraine_russia/77.jpg,misleading,Misleading,Poland Celebrates Historic Red Army Defeat On ...,False,True,False,False,API


In [62]:
# Convert None values to NaN in the Image_Path column to be consistent with all other NaN values in other columsn of df
df_ukraine_russia_final["Image_Path"] = df_ukraine_russia_final["Image_Path"].replace({None: np.nan})

# Verify it worked
print("None values in column 'Image_Path':", (df_ukraine_russia_final["Image_Path"] == None).sum().sum())
print("NaN values in column 'Image_Path':", df_ukraine_russia_final["Image_Path"].isnull().sum().sum())

## Check that no other column contains None values
print("None values in column 'Image_Path':", (df_ukraine_russia_final == None).sum().sum())
print("NaN values in column 'Image_Path':", df_ukraine_russia_final.isnull().sum().sum())

None values in column 'Image_Path': 0
NaN values in column 'Image_Path': 53
None values in column 'Image_Path': 0
NaN values in column 'Image_Path': 139


In [64]:
## Save final df as csv 

os.getcwd()

df_ukraine_russia_final.to_csv("../ukraine_russia/Combined_dataset/ukraine_russia_dataset_combined_010724_300425_final.csv")