In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

The first step in creating the code is to ensure that the installs and imports have been provided - like bringing the fresh ingredients to the kitchen * 

In [None]:
# It's a good idea to ensure you're running the latest version of any libraries you need.
# `!pip install -Uqq <libraries>` upgrades to the latest version of <libraries>
# NB: You can safely ignore any warnings or errors pip spits out about running as root or incompatibilities
import os
iskaggle = os.environ.get('KAGGLE_KERNEL_RUN_TYPE', '')

if iskaggle:
    !pip install -Uqq fastai

The next stage is to use duckduckgo to search for images:

We are going to grab an image from catregory one and **first test that the image output is valid**


In [None]:
# Skip this cell if you already have duckduckgo_search installed
!pip install ddgs

In [None]:
from ddgs import DDGS 
from fastcore.all import *
from fastai.vision.all import *

In [None]:
#This function grabs image data [url] from the dicitionary of a searched image inside a list container 
def search_images(keywords, max_images=200): return L(DDGS().images(keywords, max_results=max_images)).itemgot('image')
#first grab one image from duckduckgo of dead by daylight 
urls = search_images('legion dbd', max_images=1)
#output the image url to test, using array indexing 
urls[0]

In [None]:
#import a library function to download images from URL's
from fastdownload import download_url
#create a file destination for the download [to open from later]
dest = 'legion.jpg'
download_url(urls[0], dest, show_progress=False)
#Open the image for us to see 
im = Image.open(dest)
im.to_thumb(256,256)

Now that we know that works, and the image is actually of the game, what we can now do is try another game, such as geometry dash !

In [None]:
#assign dest and urls inside of the function for less lines of code
download_url(search_images('nurse dbd', max_images=1)[0], 'nurse.jpg', show_progress=False)
Image.open('nurse.jpg').to_thumb(256,256)

Okay great, so we see that the outputs of the duckduckgo searches are quite valid... So now what we should do is train the model

To do this we need to assign Labels to each group of image, and search on a larger scale to create 'training' sets for the images.

We want to save these images each to a folder inside the kaggle virtual machine 



In [None]:
searches = ['Animatronic','Artist','Blight','Cannibal','Cenobite','Clown','DarkLord','Deathslinger',
            'Demogorgon','Doctor','Dredge','Executioner','Ghostface','Ghoul','Goodguy','Hag','Hillbilly',
            'Houndmaster','Huntress','Knight','Legion','Lich','Mastermind','Nemesis','Nightmare','Nurse','Oni',
            'Onryō','Pig','Plague','Shape','Singularity','SkullMerchant','Spirit','Trapper','Trickster','Twins',
            'Unknown','Wraith','Xenomorph']

path = Path('dbd_killers')
from time import sleep 

for o in searches:
    #create a subfolder inside the path for our images of {o} killer
    dest = (path/o)
    #create parent folders if they do not exist [important for the integrity of the notebook]
    dest.mkdir(exist_ok=True, parents=True)
    #do our search 
    download_images(dest, urls=search_images((f'The {o} dbd photo'), max_images=50))
    #sleep before getting the next images so we dont overload the server 
    sleep(10)
    resize_images(path/o, max_size=400, dest=path/o)

What can sometimes happen when grabbing training images is that the images can become corrupt, so what we should do is test the integrity and delete broken images.

In [None]:
#Try to open images, and flag them if they fail
failed = verify_images(get_image_files(path))
#convinient function to delete the failed images 
#map == apply to every item 
failed.map(Path.unlink)

We now need to create a **Data Block** which splits the images we downloaded into **training** and **validation** images respectively 
Some will be used to train the model, and others to validate the accuracy of the model.
This is used to create a model, which will be reusable.

In [None]:
dls = DataBlock(
    blocks=(ImageBlock, CategoryBlock), 
    get_items = get_image_files, 
    splitter = RandomSplitter(valid_pct=0.2, seed=42),
    get_y = parent_label,
    item_tfms = [Resize(192, method='squish')]
).dataloaders(path) #here it calls the function 

dls.show_batch(max_n=6)

Now that we have created the datablock which contains all of our images and their file paths in a folder structure, we are ready to **fine tune** a pre-existant model, using our training data... Doing so enables us to reduce training times by a lot.

By **fine tuning** the model, we take its intelligence and apply it to our specific image classification.

We will train the model over **ten** epoches - which adjusts the weights of the neural network to distunguish between our criteria better.

In [None]:
learn = vision_learner(dls, resnet18, metrics=error_rate)
learn.fine_tune(6)

Here we can then take in an image and compare it to the model, to see how accurate it is !


In [None]:
#test image of the animatronic
URL = 'https://static.wikia.nocookie.net/multiverse-custom-night/images/e/ed/The_Animatronic.png/revision/latest/thumbnail/width/360/height/360?cb=20250625163904' #can change
download_url(URL, 'test_image.jpg', show_progress=False)

predicted_label,index,probs = learn.predict(PILImage.create('test_image.jpg'))
print(f"This is the {predicted_label}.")
print(f"Probability it's the {predicted_label}: {float(probs[index]):.4f}")

Notes:

So it essentially seems like the type of image, and getting better training data could be important, like bro look at that oni XD
More epochs = good, but can lead to retention of the training data rather than generalisation.

Also I learned that we can analyse the epoch report to determine what changes need to be made. [decreasing == training]

The first epoch is 'frozen', and the number of epochs seems to affect the quality of the frozen epoch -> potentially affecting the quality of removal and training over the subsequent epochs.

Citations:

Thank you to:
- https://colab.research.google.com/github/fastai/course22/blob/master/00-is-it-a-bird-creating-a-model-from-your-own-data.ipynb#scrollTo=S7Pxawe8l2KF
- https://www.kaggle.com/code/jhoward/is-it-a-bird-creating-a-model-from-your-own-data