# Data collection

In [1]:
from bs4 import BeautifulSoup as bs
import requests

In [2]:
leagues = ['la-liga', 'ligue-1', 'premier-league', 'serie-a', 'bundesliga']

all_teams = []

for lg in leagues:
    page = requests.get('https://www.sportsmole.co.uk/football/%s/table.html' % lg)
    soup = bs(page.text, 'html.parser')
    teams = [x.find('a').get('href')[10:-1] for x in soup.findAll('td', class_='teamtd rX')]
    all_teams.append(teams[:2])

all_teams = [x for y in all_teams for x in y] # flatten the 2d array

In [3]:
print('Number of teams: %d' % len(all_teams), all_teams, sep='\n\n')

Number of teams: 10

['barcelona', 'real-madrid', 'psg', 'lens', 'arsenal', 'man-city', 'napoli', 'ac-milan', 'bayern-munich', 'freiburg']


In [4]:
all_players = []

for team in all_teams:
    page = requests.get('https://www.sportsmole.co.uk/football/%s/squad.html' % team)
    soup = bs(page.text, 'html.parser')
    players = [x['href'][8:-1] for x in soup.findAll('a', 'name bold')]
    if len(players) > 0:
        all_players.append(players)
    else:
        print('%s not found.' % team)

all_players = [x for y in all_players for x in y] # flatten the 2d array

In [None]:
for player in all_players:
    try: 
        page = requests.get('https://www.sportsmole.co.uk/people/%s' % player)
        soup = bs(page.text, 'html.parser')
        url = soup.findAll('img',  {'itemprop': 'image'})[0]['src']
        name = soup.findAll('h1', {'itemprop': 'name'})[0]
        img = requests.get(url)
        path = 'players/%s.png' % (name.text)
        open(path, 'wb').write(img.content)
    except:
        print('%s image not found' % player)

# Face recognition

In [1]:
import os
# import face_recognition

from sklearn import neighbors
from PIL import Image, ImageDraw

In [None]:
def show_labels(img_path, predictions):

    pil_image = Image.open(img_path).convert("RGB")
    draw = ImageDraw.Draw(pil_image)

    for name, (top, right, bottom, left) in predictions:
        draw.rectangle(((left, top), (right, bottom)), outline=(0, 255, 0))
        name = name.encode("UTF-8")

        text_width, text_height = draw.textsize(name)
        draw.text((left, bottom - text_height), name, fill=(0, 255, 0))

    del draw
    pil_image.show()
    return pil_image

In [None]:
def train_knn(n_neighbors=5):

    files = os.listdir('players')
    images = ['players/%s' % s for s in files]
    names = []
    encodings = []
    
    for img in images:
        face = face_recognition.load_image_file(img)
        face_bounding_boxes = face_recognition.face_locations(face)

        if len(face_bounding_boxes) == 1:
            face_enc = face_recognition.face_encodings(face)[0]
            encodings.append(face_enc)
            names.append(img[8:-4])
        else:
            print('%s image is not suitable for training!' % img)

    clf = neighbors.KNeighborsClassifier(n_neighbors=n_neighbors, weights='distance')
    clf.fit(encodings, names)

    return clf

In [None]:
def predict(clf, test_image_path, threshold=0.5):

    test_image = face_recognition.load_image_file(test_image_path)
    face_locations = face_recognition.face_locations(test_image)
    face_encodings = face_recognition.face_encodings(test_image, face_locations)

    preds = []

    for x in face_encodings:
        closest = clf.kneighbors([x], n_neighbors=1)
        if closest[0][0][0] <= threshold:
            preds.append(clf.predict([x])[0])
        else:
            preds.append('')

    return zip(preds, face_locations)

In [None]:
clf = train_knn(n_neighbors=1)

In [None]:
for test_file in os.listdir('test'):
    test = 'test/' + test_file
    preds = predict(clf, test, threshold=0.6)
    show_labels(test, preds)