## Preprocess

In [1]:
import json
import requests

import matplotlib.pyplot as plt
import pandas as pd

LOTTO_API = "https://www.dhlottery.co.kr/common.do?method=getLottoNumber&drwNo="

## Data

### Get lotto data (Run only once at first)

In [3]:
lotto_data = []

print("--- GET lotto data ---")

drw_no = 1
while True:
  res = requests.get(LOTTO_API + str(drw_no))
  drw = res.json()

  if drw["returnValue"] == "fail":
    break
  lotto_data.append(drw)

  if drw["drwNo"] % 100 == 0:
    print("Current drwNo:", drw["drwNo"])
  drw_no += 1

print("--- END lotto data ---")
print()

print("Total data:", len(lotto_data))

--- GET lotto data ---
Current drwNo: 100
Current drwNo: 200
Current drwNo: 300
Current drwNo: 400
Current drwNo: 500
Current drwNo: 600
Current drwNo: 700
Current drwNo: 800
Current drwNo: 900
Current drwNo: 1000
--- END lotto data ---

Total data: 1076


In [4]:
lotto_df = pd.DataFrame(lotto_data)
lotto_df

Unnamed: 0,totSellamnt,returnValue,drwNoDate,firstWinamnt,drwtNo6,drwtNo4,firstPrzwnerCo,drwtNo5,bnusNo,firstAccumamnt,drwNo,drwtNo2,drwtNo3,drwtNo1
0,3681782000,success,2002-12-07,0,40,33,0,37,16,863604600,1,23,29,10
1,4904274000,success,2002-12-14,2002006800,42,25,1,32,2,0,2,13,21,9
2,4729342000,success,2002-12-21,2000000000,31,21,1,27,30,0,3,16,19,11
3,5271464000,success,2002-12-28,0,42,31,0,40,2,1267147200,4,27,30,14
4,6277102000,success,2003-01-04,0,42,40,0,41,3,3041094900,5,24,29,16
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1071,108156654000,success,2023-06-17,2175006375,43,23,12,32,27,26100076500,1072,18,20,16
1072,107352123000,success,2023-06-24,2345227603,38,30,11,32,15,25797503633,1073,18,28,6
1073,105744157000,success,2023-07-01,2134763657,41,27,12,28,15,25617163884,1074,6,20,1
1074,105635232000,success,2023-07-08,2896337167,45,35,9,44,10,26067034503,1075,23,24,1


### Update lotto data

In [8]:
lotto_df = pd.read_csv("./data/lotto.tsv", sep="\t")
lotto_df

Unnamed: 0,drwNo,returnValue,drwNoDate,totSellamnt,firstAccumamnt,firstPrzwnerCo,firstWinamnt,drwtNo1,drwtNo2,drwtNo3,drwtNo4,drwtNo5,drwtNo6,bnusNo
0,1,success,2002-12-07,3681782000,863604600,0,0,10,23,29,33,37,40,16
1,2,success,2002-12-14,4904274000,0,1,2002006800,9,13,21,25,32,42,2
2,3,success,2002-12-21,4729342000,0,1,2000000000,11,16,19,21,27,31,30
3,4,success,2002-12-28,5271464000,1267147200,0,0,14,27,30,31,40,42,2
4,5,success,2003-01-04,6277102000,3041094900,0,0,16,24,29,40,41,42,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1071,1072,success,2023-06-17,108156654000,26100076500,12,2175006375,16,18,20,23,32,43,27
1072,1073,success,2023-06-24,107352123000,25797503633,11,2345227603,6,18,28,30,32,38,15
1073,1074,success,2023-07-01,105744157000,25617163884,12,2134763657,1,6,20,27,28,41,15
1074,1075,success,2023-07-08,105635232000,26067034503,9,2896337167,1,23,24,35,44,45,10


In [9]:
new_lotto_data = []

print("--- GET lotto data ---")

drw_no = lotto_df["drwNo"].max() + 1
while True:
  res = requests.get(LOTTO_API + str(drw_no))
  drw = res.json()

  if drw["returnValue"] == "fail":
    break
  new_lotto_data.append(drw)

  if drw["drwNo"] % 10 == 0:
    print("Current drwNo:", drw["drwNo"])
  drw_no += 1

print("--- END lotto data ---")
print()

print("Total data:", len(new_lotto_data))

--- GET lotto data ---
--- END lotto data ---

Total data: 0


In [10]:
new_lotto_df = pd.DataFrame(new_lotto_data)
new_lotto_df

In [11]:
lotto_df = pd.concat([lotto_df, new_lotto_df], ignore_index = True)
lotto_df = lotto_df.drop_duplicates()
lotto_df.tail(20)

Unnamed: 0,drwNo,returnValue,drwNoDate,totSellamnt,firstAccumamnt,firstPrzwnerCo,firstWinamnt,drwtNo1,drwtNo2,drwtNo3,drwtNo4,drwtNo5,drwtNo6,bnusNo
1056,1057,success,2023-03-04,112523887000,27473185138,17,1616069714,8,13,19,27,40,45,12
1057,1058,success,2023-03-11,112041382000,26754263250,13,2058020250,11,23,25,30,32,40,42
1058,1059,success,2023-03-18,110846824000,26431190253,13,2033168481,7,10,22,25,34,40,27
1059,1060,success,2023-03-25,110677185000,25150689396,28,898238907,3,10,24,33,38,45,36
1060,1061,success,2023-04-01,109909344000,26650456503,11,2422768773,4,24,27,35,37,45,15
1061,1062,success,2023-04-08,107109911000,26613536628,7,3801933804,20,31,32,40,41,45,12
1062,1063,success,2023-04-15,108347135000,26392183125,7,3770311875,3,6,22,23,24,38,30
1063,1064,success,2023-04-22,109427584000,25615194014,19,1348168106,3,6,9,18,22,35,14
1064,1065,success,2023-04-29,108181727000,25936315132,14,1852593938,3,18,19,23,32,45,24
1065,1066,success,2023-05-06,106440950000,25064208750,15,1670947250,6,11,16,19,21,32,45


### Save lotto data

In [5]:
lotto_df = lotto_df[["drwNo", "returnValue", "drwNoDate", "totSellamnt", "firstAccumamnt", "firstPrzwnerCo", "firstWinamnt",
                     "drwtNo1", "drwtNo2", "drwtNo3", "drwtNo4", "drwtNo5", "drwtNo6", "bnusNo"]]

In [7]:
lotto_df.to_csv("./data/lotto.tsv", sep="\t", index=False)

## Result

In [12]:
lotto_df = pd.read_csv("./data/lotto.tsv", sep="\t")
lotto_df["drwNoDate"] = pd.to_datetime(lotto_df["drwNoDate"])
lotto_df

Unnamed: 0,drwNo,returnValue,drwNoDate,totSellamnt,firstAccumamnt,firstPrzwnerCo,firstWinamnt,drwtNo1,drwtNo2,drwtNo3,drwtNo4,drwtNo5,drwtNo6,bnusNo
0,1,success,2002-12-07,3681782000,863604600,0,0,10,23,29,33,37,40,16
1,2,success,2002-12-14,4904274000,0,1,2002006800,9,13,21,25,32,42,2
2,3,success,2002-12-21,4729342000,0,1,2000000000,11,16,19,21,27,31,30
3,4,success,2002-12-28,5271464000,1267147200,0,0,14,27,30,31,40,42,2
4,5,success,2003-01-04,6277102000,3041094900,0,0,16,24,29,40,41,42,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1071,1072,success,2023-06-17,108156654000,26100076500,12,2175006375,16,18,20,23,32,43,27
1072,1073,success,2023-06-24,107352123000,25797503633,11,2345227603,6,18,28,30,32,38,15
1073,1074,success,2023-07-01,105744157000,25617163884,12,2134763657,1,6,20,27,28,41,15
1074,1075,success,2023-07-08,105635232000,26067034503,9,2896337167,1,23,24,35,44,45,10
