# Import libraries

In [10]:
import pandas as pd
import plotly.graph_objs as go
import random

# Import csv

In [11]:
df = pd.read_csv('./euromillions.csv', sep=";")
df

Unnamed: 0,Date,N1,N2,N3,N4,N5,E1,E2,Winner,Gain
0,2021-12-07,22,31,38,46,47,7,11,1,143000000
1,2021-12-03,21,22,29,32,46,9,10,0,130000000
2,2021-11-30,19,20,26,33,43,1,4,0,0
3,2021-11-26,1,20,41,42,50,3,7,1,163000000
4,2021-11-23,17,21,36,42,46,7,10,0,145000000
...,...,...,...,...,...,...,...,...,...,...
1478,2004-03-12,15,24,28,44,47,4,5,0,15000000
1479,2004-03-05,4,7,33,37,39,1,5,1,20000000
1480,2004-02-27,14,18,19,31,37,4,5,0,17000000
1481,2004-02-20,7,13,39,47,50,2,5,0,15000000


# Get the win numbers

In [12]:
dfWin = df[df['Gain'] != 0].reset_index(drop=True)
dfWin

Unnamed: 0,Date,N1,N2,N3,N4,N5,E1,E2,Winner,Gain
0,2021-12-07,22,31,38,46,47,7,11,1,143000000
1,2021-12-03,21,22,29,32,46,9,10,0,130000000
2,2021-11-26,1,20,41,42,50,3,7,1,163000000
3,2021-11-23,17,21,36,42,46,7,10,0,145000000
4,2021-11-19,22,26,38,47,50,2,6,0,131000000
...,...,...,...,...,...,...,...,...,...,...
1470,2004-03-12,15,24,28,44,47,4,5,0,15000000
1471,2004-03-05,4,7,33,37,39,1,5,1,20000000
1472,2004-02-27,14,18,19,31,37,4,5,0,17000000
1473,2004-02-20,7,13,39,47,50,2,5,0,15000000


# Count all the win numbers appeared

In [13]:
# count number of wins
def count_numbers(df):
    return df['N1'].value_counts().add(df['N2'].value_counts(), fill_value=0).add(df['N3'].value_counts(), fill_value=0).add(df['N4'].value_counts(), fill_value=0).add(df['N5'].value_counts(), fill_value=0)

In [14]:
count = count_numbers(dfWin)

In [15]:
# draw the chart for count
fig = go.Figure()
fig.add_trace(go.Bar(x=count.index, y=count.values, name='Win'))
fig.show()

# Verify random

First of all, we are going to verify that the draws are indeed random draws.    
Here, as with the dataset provided to us, we will generate 1318 random draws of 5 numbers between 1 and 50.     
We will then compare these prints to the dataset.

In [16]:
def generateDraws(nbrDraw):
    dfDraw = pd.DataFrame(columns=['N1','N2','N3','N4','N5'])
    for i in range(nbrDraw):
        draw = random.sample(range(1, 51), 5)
        row = {}
        row['N1'] = draw[0]
        row['N2'] = draw[1]
        row['N3'] = draw[2]
        row['N4'] = draw[3]
        row['N5'] = draw[4]
        dfDraw = dfDraw.append(row, ignore_index=True)
    return dfDraw

dfLittleDraw = generateDraws(1318)

In [17]:
# count values in dataframes
countLittle = count_numbers(dfLittleDraw)
countDfWin = count_numbers(dfWin)
# trace the chart
fig = go.Figure()
fig.add_trace(go.Bar(x=countLittle.index, y=countLittle.values, name='Random'))
fig.add_trace(go.Bar(x=countDfWin.index, y=countDfWin.values, name='Datasource'))
fig.show()

We can observe that the numbers from the datasource are similar to the data of the random generated on a computer and therefore very reliable.

# Convergence

In order to analyze the convergence of the draws, we are going to generate 100,000 more random draws of 5 numbers.

In [18]:
dfManyDraw = pd.concat([dfLittleDraw, generateDraws(100000)])

In [19]:
# count values in dataframe
countMany = dfManyDraw['N1'].value_counts() + dfManyDraw['N2'].value_counts() + dfManyDraw['N3'].value_counts() + dfManyDraw['N4'].value_counts() + dfManyDraw['N5'].value_counts()

# normalize counts
countLittle = countLittle / countLittle.max()
countMany = countMany / countMany.max()

# trace the chart
fig = go.Figure()
fig.add_trace(go.Bar(x=countLittle.index, y=countLittle.values, name='1,318'))
fig.add_trace(go.Bar(x=countMany.index, y=countMany.values, name='101,318'))
fig.show()

As can be seen in this comparison, the count of numbers drawn tend to stabilize, and the gap is narrowing.

# Conclusion
We can notice that the counters of the numbers drawn are for the moment slightly unbalanced, but will gradually stabilize.    
Therefore, we can conclude that the weakly drawn numbers will be drawn more in the future than those which have often been drawn.