In [1]:
import pandas as pd
import plotly
import seaborn as sns
from matplotlib import pyplot
from pathlib import Path
import os
from datetime import datetime

In [2]:
root_dir = Path(os.getcwd()).parent
data_dir = root_dir / "data"
chispazo_dir = data_dir / 'chispazo'

In [3]:
chispazo_files = os.listdir(chispazo_dir)

In [4]:
chispazo_files

['170722.csv',
 '140221.csv',
 '020825.csv',
 '100126.csv',
 '110725.csv',
 '100725.csv']

In [5]:
for i in chispazo_files:
    print(i.split('.')[0])

170722
140221
020825
100126
110725
100725


In [6]:
dates = []
for i in chispazo_dir.glob('*.csv'):
    try:
        file_name = i.stem
        date = datetime.strptime(file_name, "%d%m%y")
        dates.append((file_name, str(date.date())))
    except ValueError:
        continue

In [7]:
if dates:
    most_recent = max(dates, key=lambda x:x[1])
else:
    print("Files were not found in directory")

In [8]:
most_recent

('100126', '2026-01-10')

In [9]:
df = pd.read_csv(f'{chispazo_dir}/{most_recent[0]}.csv')

In [10]:
df.head()

Unnamed: 0,CONCURSO,R1,R2,R3,R4,R5,FECHA
0,11388,1,5,13,17,28,09/07/2025
1,11387,3,11,19,25,27,09/07/2025
2,11386,7,9,10,15,19,08/07/2025
3,11385,3,7,14,26,28,08/07/2025
4,11384,1,3,23,24,27,07/07/2025


In [11]:
df.sort_values('CONCURSO', axis=0, ascending=True, inplace=True, na_position='last')

In [12]:
df.head()

Unnamed: 0,CONCURSO,R1,R2,R3,R4,R5,FECHA
11387,1,4,7,11,21,24,05/01/1999
11386,2,5,12,18,22,23,08/01/1999
11385,3,1,12,16,18,21,12/01/1999
11384,4,6,10,16,20,25,19/01/1999
11383,5,12,14,17,21,24,19/01/1999


In [13]:
df.tail()

Unnamed: 0,CONCURSO,R1,R2,R3,R4,R5,FECHA
4,11384,1,3,23,24,27,07/07/2025
3,11385,3,7,14,26,28,08/07/2025
2,11386,7,9,10,15,19,08/07/2025
1,11387,3,11,19,25,27,09/07/2025
0,11388,1,5,13,17,28,09/07/2025


In [14]:
df.reset_index(inplace=True, drop=True)

In [15]:
df['R1'].value_counts().sort_values()

R1
22       1
21       6
20      11
19      17
18      24
17      37
16      43
15      82
14     111
13     162
12     206
11     263
10     374
9      452
8      540
7      690
6      817
5     1055
4     1264
3     1440
2     1738
1     2055
Name: count, dtype: int64

In [16]:
df['R2'].value_counts().sort_values()

R2
25      3
24     11
23     21
22     53
21     75
20    137
19    151
18    216
17    308
2     335
16    351
15    461
3     515
14    572
13    586
12    707
4     720
11    789
5     800
6     880
10    904
9     905
7     920
8     968
Name: count, dtype: int64

In [17]:
df['R3'].value_counts().sort_values()

R3
26     29
3      50
25     74
4      86
5     171
24    183
23    237
6     288
22    352
7     365
21    447
8     475
20    511
9     584
19    646
10    678
18    687
11    709
17    747
12    765
16    808
15    810
14    828
13    858
Name: count, dtype: int64

In [18]:
df['R4'].value_counts().sort_values()

R4
4       4
5      17
6      30
7      57
8      83
9     112
10    190
11    247
27    272
12    339
13    352
14    475
26    478
15    606
25    613
16    669
17    775
24    784
18    822
19    851
23    879
22    899
21    910
20    924
Name: count, dtype: int64

In [19]:
df['R5'].value_counts().sort_values()

R5
6        1
7        4
9        6
8        8
11      21
10      22
12      45
13      62
14      85
15     105
16     154
17     243
18     336
19     383
20     442
21     664
22     727
23     947
24    1072
26    1273
25    1404
27    1619
28    1765
Name: count, dtype: int64