In [3]:
import pandas as pd 
from typing import Dict, Optional, List
from collections import Counter

In [4]:
lotto = pd.read_csv("./output/merged_lotto.csv")
lotto_plus = pd.read_csv("./output/merged_lottoplus.csv")
mini_lotto = pd.read_csv("./output/mini_lotto.csv")

print(lotto)

        number        date            numbers  szostka_count  szostka_value  \
0           47  22-12-1957   8,15,24,27,37,46              0            0.0   
1           46  15-12-1957   7,12,19,36,42,49              0            0.0   
2           45  08-12-1957  11,17,24,35,42,47              0            0.0   
3           44  01-12-1957    5,8,14,26,32,44              0            0.0   
4           43  24-11-1957    2,6,18,20,40,49              0            0.0   
...        ...         ...                ...            ...            ...   
201003    6986  11-01-2024   2,13,17,26,34,38              0            0.0   
201004    6985  09-01-2024   4,19,20,23,28,48              0            0.0   
201005    6984  06-01-2024      2,5,8,9,30,42              0            0.0   
201006    6983  04-01-2024  10,25,26,28,44,46              0            0.0   
201007    6982  02-01-2024  15,18,28,30,37,49              0            0.0   

        piatka_count  piatka_value  czworka_count  

In [5]:
def get_all_numbers(dataset) -> List:
    all_numbers = []
    for numbers_str in dataset['numbers']:
        numbers_list = numbers_str.split(',')
        all_numbers.extend(map(int, numbers_list))
    
    return all_numbers

In [6]:


def get_most_common_numbers(dataset, num_results: int = 6) -> Dict[Optional[int], Optional[int]]:
    all_numbers = get_all_numbers(dataset)
    
    counts_df = pd.DataFrame(all_numbers, columns=['number']).groupby('number').size().reset_index(name='count')
    
    counts_df = counts_df.sort_values(by='count', ascending=False)
    
    return counts_df.head(num_results)


In [7]:
get_most_common_numbers(lotto)

Unnamed: 0,number,count
16,17,26860
26,27,26593
33,34,26412
20,21,25988
30,31,25817
23,24,25709


In [8]:
get_most_common_numbers(lotto_plus)

Unnamed: 0,number,count
7,8,1811
39,40,1806
38,39,1765
43,44,1746
28,29,1744
15,16,1724


In [9]:
get_most_common_numbers(mini_lotto)

Unnamed: 0,number,count
20,21,9943
29,30,9834
16,17,9809
5,6,9628
27,28,9614
6,7,9546


In [10]:
 
def get_most_common_numbers_by_year(dataset, num_result: int = 6):
    dataset['year'] = pd.to_datetime(dataset['date'], format='%d-%m-%Y').dt.year
    
    result_df = pd.DataFrame(columns=['year', 'number', 'count'])
    
    for year, group in dataset.groupby('year'):
        all_numbers = []
        for numbers_str in group['numbers']:
            numbers_list = numbers_str.split(',')
            all_numbers.extend(map(int, numbers_list))
        
        counts_df = pd.DataFrame(all_numbers, columns=['number']).groupby('number').size().reset_index(name='count')
        counts_df = counts_df.sort_values(by='count', ascending=False)
        counts_df['year'] = year
        result_df = pd.concat([result_df, counts_df.head(num_result)])
    
    return result_df


In [11]:
get_most_common_numbers_by_year(lotto)

Unnamed: 0,year,number,count
7,1957,8,680
18,1957,19,612
12,1957,13,612
31,1957,32,544
47,1957,48,544
...,...,...,...
1,2024,2,9
25,2024,26,8
11,2024,12,8
43,2024,46,7


In [12]:
get_most_common_numbers_by_year(lotto_plus)

Unnamed: 0,year,number,count
39,2012,40,156
26,2012,27,130
42,2012,43,117
11,2012,12,117
43,2012,44,104
...,...,...,...
28,2024,29,8
7,2024,8,8
8,2024,9,8
36,2024,37,8


In [13]:
get_most_common_numbers_by_year(mini_lotto)

Unnamed: 0,year,number,count
25,1981,26,462
4,1981,5,462
33,1981,34,420
29,1981,30,378
14,1981,15,336
...,...,...,...
40,2024,41,18
17,2024,18,18
27,2024,28,16
30,2024,31,15


In [31]:
from itertools import combinations
from collections import Counter

def most_common_number_string(dataset):
    set_counts = Counter()
    max_length = 6
    
    numbers_split = dataset['numbers'].str.split(',')
    
    for nums in numbers_split:
        nums.sort()
        for length in range(2, min(max_length, len(nums)) + 1):
            sets = [tuple(sorted(combo)) for combo in combinations(nums, length)]
            set_counts.update(sets)
    
    sets_df = pd.DataFrame(list(set_counts.items()), columns=['Set', 'Count'])
    sets_df = sets_df.sort_values(by='Count', ascending=False)
    
    return sets_df

In [32]:
most_common_number_string(lotto)

Unnamed: 0,Set,Count
462,"(27, 34)",3645
5860,"(18, 9)",3580
2144,"(20, 29)",3577
902,"(17, 38)",3573
847,"(16, 6)",3548
...,...,...
150874,"(12, 17, 34, 39)",1
150875,"(12, 17, 34, 46)",1
150876,"(12, 17, 39, 46)",1
150877,"(12, 34, 39, 46)",1


In [33]:
most_common_number_string(lotto_plus)

Unnamed: 0,Set,Count
292,"(29, 6)",305
790,"(12, 8)",288
741,"(20, 40)",283
1562,"(17, 29)",278
4753,"(38, 40)",276
...,...,...
54489,"(18, 21, 29, 6)",1
54490,"(18, 21, 34, 38)",1
54491,"(18, 21, 34, 6)",1
54492,"(18, 21, 38, 6)",1


In [34]:
most_common_number_string(mini_lotto)

Unnamed: 0,Set,Count
178,"(15, 21)",1518
543,"(15, 30)",1435
921,"(32, 7)",1368
2648,"(2, 3)",1273
209,"(22, 29)",1253
...,...,...
43232,"(20, 27, 29, 31)",1
43233,"(19, 20, 27, 29, 31)",1
43234,"(2, 20, 26, 7)",1
43235,"(2, 20, 31, 7)",1
