## Scrape Lotto Winning number from official site
https://www.loterienationale.mu/fr/tirages-et-archives?field_date_du_tirage_value%5Bvalue%5D%5Bdate%5D=04+Jan+2023

In [None]:
import requests
from lxml import etree
from bs4 import BeautifulSoup
from datetime import date, datetime, timedelta
import pandas as pd
import numpy as np
import calendar

In [None]:
sample = "https://www.loterienationale.mu/fr/tirages-et-archives?field_date_du_tirage_value%5Bvalue%5D%5Bdate%5D=04+Jan+2023"
url = "https://www.loterienationale.mu/fr/tirages-et-archives"
params = {'field_date_du_tirage_value[value][date]': '02 Fév 2022'}
page = requests.get(url, params=params)
page


In [None]:
def get_winning_number(page):
    soup = BeautifulSoup(page.content, "html.parser")
    dom = etree.HTML(str(soup))
    try:
        result = dom.xpath('//*[@id="num-gagnants"]')[0].text
        # print(result)
        lotto_nums = [int(i) for i in result.split(',')]
        return {'A': lotto_nums[0], 'B': lotto_nums[1], 'C': lotto_nums[2], 'D': lotto_nums[3], 'E': lotto_nums[4], 'F': lotto_nums[5]}
    except:
        pass
    

get_winning_number(page)

In [None]:
def sort_dates(dates):
    def date_key(date_string):
        return datetime.strptime(date_string, '%d %b %Y')     
    return sorted(dates, key=date_key)

def replace_french_month(dates):
    fr_month = { 'Jan': 1,  'Fév': 2,  'Mars': 3,  'Avr': 4,  'mai': 5,  'Juin': 6,  'Jui': 7,  'Août': 8,  'Sep': 9,  'Oct': 10,  'Nov': 11,  'Dec': 12}
    fr_month = dict((v,k) for k,v in fr_month.items())
    # print(fr_month)
    
    for i, date in enumerate(dates):
        month_num = datetime.strptime(date, '%d %b %Y').month
        # print(date,  fr_month[month_num], calendar.month_abbr[month_num])
        new_date = date.replace(calendar.month_abbr[month_num], fr_month[month_num])
        dates[i] = new_date 
    return dates
 
def get_lotto_days(year):
    wed = pd.date_range(start=str(year), end=str(year+1), freq='W-WED').strftime('%d %b %Y').tolist()
    sat = pd.date_range(start=str(year), end=str(year+1), freq='W-SAT').strftime('%d %b %Y').tolist()
    
    final = []
    for x,y in zip(wed, sat):
        final.append(x)
        final.append(y)

    sorted_dates = sort_dates(final)
    new_fr_dates = replace_french_month(sorted_dates)
    return [new_fr_dates, sort_dates(final)]
    
# replace_french_month(get_lotto_days(2022)[9:15])
# start loto date 07 novembre 2009
get_lotto_days(2009)[0][88:]
get_lotto_days(2023)[0][:4]

In [67]:
def scrape(year):
    lotto_date_range, lotto_eng_dates = get_lotto_days(year)[0], get_lotto_days(year)[1]
    # lotto_date_range, lotto_eng_dates = get_lotto_days(2009)[0][88:], get_lotto_days(2009)[1][88:]
    # lotto_date_range, lotto_eng_dates = get_lotto_days(2023)[0][:4], get_lotto_days(2023)[1][:4]
    data = []

    for i, lotto_date in enumerate(lotto_date_range, start = 1):
        
        params = {'field_date_du_tirage_value[value][date]': lotto_date}
        page = requests.get(url, params=params, timeout=5)
        if b"Pas de tirage" in page.content:
            # print(True)
            continue

        date_format = datetime.strptime(lotto_eng_dates[i - 1], '%d %b %Y').strftime("%d/%m/%Y")
        # print(date_format)
        more_info = {'Game': i,'Date': date_format}

        final_dict = {**more_info, **get_winning_number(page)}
        print(final_dict)
        data.append(final_dict)
 
    # print(data)
    return data


In [73]:
year = 2023
lotto_df = scrape(year)

df = pd.DataFrame(lotto_df, columns=('Game', 'Date', 'A', 'B', 'C', 'D', 'E', 'F'))
df.index = np.arange(1, len(df)+1)
df.to_csv(f'lotto-{year}.csv', index=False)
df

{'Game': 1, 'Date': '04/01/2023', 'A': 7, 'B': 12, 'C': 15, 'D': 21, 'E': 30, 'F': 34}
{'Game': 2, 'Date': '07/01/2023', 'A': 3, 'B': 8, 'C': 26, 'D': 28, 'E': 33, 'F': 35}
{'Game': 3, 'Date': '11/01/2023', 'A': 1, 'B': 5, 'C': 14, 'D': 18, 'E': 31, 'F': 35}
{'Game': 4, 'Date': '14/01/2023', 'A': 11, 'B': 14, 'C': 20, 'D': 24, 'E': 28, 'F': 29}


Unnamed: 0,Game,Date,A,B,C,D,E,F
1,1,04/01/2023,7,12,15,21,30,34
2,2,07/01/2023,3,8,26,28,33,35
3,3,11/01/2023,1,5,14,18,31,35
4,4,14/01/2023,11,14,20,24,28,29
