# Imports

In [69]:
import logging
import requests
import zipfile
import os
import pandas as pd
import numpy as np
from itertools import chain
from collections import Counter
import random
import argparse

logger = logging.getLogger()
logging.basicConfig(level=logging.INFO)


# Constants and URLs and functions

## Constants

In [68]:
LOTO_URLS = [
    'https://media.fdj.fr/static/csv/loto/loto_197605.zip',
    'https://media.fdj.fr/static/csv/loto/loto_200810.zip',
    'https://media.fdj.fr/static/csv/loto/loto_201703.zip',
    'https://media.fdj.fr/static/csv/loto/loto_201902.zip',
    'https://media.fdj.fr/static/csv/loto/loto_201911.zip',
]

CSVS = [f'loto_{i}.csv' for i in range(5)]

COLUMNS = ['annee_numero_de_tirage'] + [f'boule_{i}' for i in range(1, 6)] + ['numero_chance']


## Functions

In [70]:
def asint(l):
    return [int(e) for e in l]

def custom_fn(row):
    return (sorted(asint(row[1:6])), [int(row[6])])

def read_csv(filename):
    logger.info(f'Reading csv file {filename}')
    data = pd.read_csv(filename, sep=';')
    if filename == CSVS[0]:
        columns = ['annee_numero_de_tirage'] + [f'boule_{i}' for i in range(1, 6)] + ['boule_complementaire']
        data = data[columns]
        data.columns = COLUMNS
        data.numero_chance = data.numero_chance.map(lambda x: x % 11)

    else:
        data = data[COLUMNS]

    return data

def generate_games(games, history, n_games):

    if history > len(games):
        logger.warning(f'History value exceeds the number of history games, setting history to maximal value: {len(games)}')
        history = len(games)

    logger.info(f'Generating {n_games} games based on the last past {history} games:')

    generated_games = []

    past_balls = chain(*[game[0] for game in games[:history]])
    past_stars = chain(*[game[1] for game in games[:history]])

    ball_counter = Counter(past_balls)
    star_counter = Counter(past_stars)

    played_balls = np.array(list(ball_counter.keys()))
    played_stars = np.array(list(star_counter.keys()))

    proba_balls = np.array(list(ball_counter.values())) / np.linalg.norm(list(ball_counter.values()), ord=1)
    proba_stars = np.array(list(star_counter.values())) / np.linalg.norm(list(star_counter.values()), ord=1)


    for i in range(n_games):

        while True:
            ball_selection = sorted(np.random.choice(played_balls, size=5, replace=False, p=proba_balls))
            star_selection = sorted(np.random.choice(played_stars, size=1, replace=False, p=proba_stars))

            if sum([ball > 25 for ball in ball_selection]) != random.choice([2,3]):
                continue
            else:
                generated_games.append((ball_selection, star_selection))
                break

    return generated_games

def download_helper():
    for i, url in enumerate(LOTO_URLS):
        zipfilename = f'loto_{i}.zip'
        csvfilename = f'loto_{i}.csv'
        if not(os.path.isfile(csvfilename)):
            logger.info(f'Downloading file {csvfilename}')
            r = requests.get(url, allow_redirects=True)
            open(zipfilename, 'wb').write(r.content)
            zipdata = zipfile.ZipFile(zipfilename)
            zipinfo = zipdata.infolist()[0]
            zipinfo.filename = csvfilename
            zipdata.extract(zipinfo)
            if os.path.isfile(zipfilename):
                os.remove(zipfilename)


def download_data(missing_only=True):
    if not missing_only:
        download_helper()
    else:
        if not all([os.path.isfile(csv) for csv in CSVS]):
            logger.info('Some files are missing, we are downloading the files')
            download_helper()

# Loading the data

In [71]:
download_data(download_all=False) # Download missing files only. Set to True to download all historical files

# Processing the data

In [72]:
pd.set_option('display.max_columns', None)

csv = CSVS[0]
df = pd.read_csv(csv, sep=';')


total_games = pd.concat([read_csv(csv) for csv in CSVS], axis=0).sort_values(by='annee_numero_de_tirage', ascending=False)
games = total_games.apply(custom_fn, axis = 1).values.tolist() # format loto data

INFO:root:Reading csv file loto_0.csv
INFO:root:Reading csv file loto_1.csv
INFO:root:Reading csv file loto_2.csv
INFO:root:Reading csv file loto_3.csv
INFO:root:Reading csv file loto_4.csv


# Generating Loto games

In [73]:
@dataclass
class Arguments:
    ngames: int = 2
    history: int = 50

ngames = 2
history = 50

args = Arguments(ngames, history)

gen_games = generate_games(games, args.history, args.ngames)

for game in gen_games:
    print(game)

INFO:root:Generating 2 games based on the last past 50 games:


([13, 17, 24, 33, 41], [1])
([12, 15, 34, 37, 44], [6])
