# Imports

In [26]:
import logging
import requests
import zipfile
import os
import pandas as pd
import numpy as np
from itertools import chain
from collections import Counter
import random
import argparse
from dataclasses import dataclass

logger = logging.getLogger()
logging.basicConfig(level=logging.INFO)


# Constants and URLs and functions

## Constants

In [2]:
URLS = [
    'https://media.fdj.fr/static/csv/euromillions/euromillions_200402.zip',
    'https://media.fdj.fr/static/csv/euromillions/euromillions_201105.zip',
    'https://media.fdj.fr/static/csv/euromillions/euromillions_201402.zip',
    'https://media.fdj.fr/static/csv/euromillions/euromillions_201609.zip',
    'https://media.fdj.fr/static/csv/euromillions/euromillions_201902.zip',
    'https://media.fdj.fr/static/csv/euromillions/euromillions_202002.zip'
]

CSVS = [f'euromillion_{i}.csv' for i in range(len(URLS))]

COLUMNS = ['annee_numero_de_tirage'] + [f'boule_{i}' for i in range(1, 6)] + [f'etoile_{i}' for i in range(1,3)]

## Functions

In [20]:
def asint(l):
    return [int(e) for e in l]

def custom_fn(row):
    return (sorted(asint(row[1:6])), sorted(asint(row[6:])))

def read_csv(filename):
    data = pd.read_csv(filename, delimiter=';', encoding='latin-1')
    if filename == CSVS[3]:
        data = data[['annee_numero_de_tirage','date_de_forclusion', 'boule_1', 'boule_2', 'boule_3', 'boule_4', 'boule_5', 'etoile_1']]
        data.columns = COLUMNS
        data.annee_numero_de_tirage = data.index
    else:
        data = data[COLUMNS]
    return data

def generate_games(games, history, n_games):

    if history > len(games):
        logger.warning(f'History value exceeds the number of history games, setting history to maximal value: {len(games)}')
        history = len(games)

    logger.info(f'Generating {n_games} games based on the last past {history} games:')

    generated_games = []

    past_balls = chain(*[game[0] for game in games[:history]])
    past_stars = chain(*[game[1] for game in games[:history]])

    ball_counter = Counter(past_balls)
    star_counter = Counter(past_stars)

    played_balls = np.array(list(ball_counter.keys()))
    played_stars = np.array(list(star_counter.keys()))

    proba_balls = np.array(list(ball_counter.values())) / np.linalg.norm(list(ball_counter.values()), ord=1)
    proba_stars = np.array(list(star_counter.values())) / np.linalg.norm(list(star_counter.values()), ord=1)


    for i in range(n_games):

        while True:
            ball_selection = sorted(np.random.choice(played_balls, size=5, replace=False, p=proba_balls))
            star_selection = sorted(np.random.choice(played_stars, size=2, replace=False, p=proba_stars))

            if sum([ball > 25 for ball in ball_selection]) != random.choice([2,3]):
                continue
            else:
                generated_games.append((ball_selection, star_selection))
                break

    return generated_games

def download_helper(download_all = False):
    for i, url in enumerate(URLS):
        zipfilename = f'euromillion_{i}.zip'
        csvfilename = f'euromillion_{i}.csv'
        if not(os.path.isfile(csvfilename)) or download_all:
            logger.info(f'Downloading file {csvfilename}')
            r = requests.get(url, allow_redirects=True)
            open(zipfilename, 'wb').write(r.content)
            zipdata = zipfile.ZipFile(zipfilename)
            zipinfo = zipdata.infolist()[0]
            zipinfo.filename = csvfilename
            zipdata.extract(zipinfo)
            if os.path.isfile(zipfilename):
                os.remove(zipfilename)


def download_data(download_all=False):
    if download_all:
        download_helper(download_all)
    else:
        if not all([os.path.isfile(csv) for csv in CSVS]):
            logger.info('Some files are missing, we are downloading the files')
            download_helper(download_all)

# Loading the data

In [21]:
download_data(download_all=False) # Download missing files only. Set to True to download all historical files

# Processing the data

In [22]:
pd.set_option('display.max_columns', None)

csv = CSVS[0]
df = pd.read_csv(csv, sep=';')


total_games = pd.concat([read_csv(csv) for csv in CSVS], axis=0).sort_values(by='annee_numero_de_tirage', ascending=False)
games = total_games.apply(custom_fn, axis = 1).values.tolist() # format euromillion data

INFO:root:Reading csv file euromillion_0.csv
INFO:root:Reading csv file euromillion_1.csv
INFO:root:Reading csv file euromillion_2.csv
INFO:root:Reading csv file euromillion_3.csv
INFO:root:Reading csv file euromillion_4.csv
INFO:root:Reading csv file euromillion_5.csv


# Generating Loto games

In [31]:
@dataclass
class Arguments:
    ngames: int = 2
    history: int = 50

ngames = 2
history = 20

args = Arguments(ngames, history)

gen_games = generate_games(games, args.history, args.ngames)

for game in gen_games:
    print(game)

INFO:root:Generating 2 games based on the last past 1551 games:


([3, 15, 34, 35, 48], [4, 11])
([1, 5, 17, 27, 37], [2, 5])
