## Preprocess

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
%cd /content/drive/MyDrive/workspace/datarea/jupyter/lotto

/content/drive/MyDrive/workspace/datarea/jupyter/lotto


In [6]:
import json
import requests

import matplotlib.pyplot as plt
import pandas as pd

LOTTO_API = "https://www.dhlottery.co.kr/common.do?method=getLottoNumber&drwNo="

## Data

### Get lotto data (Run only once at first)

In [None]:
lotto_data = []

print("--- GET lotto data ---")

drw_no = 1
while True:
  res = requests.get(LOTTO_API + str(drw_no))
  drw = res.json()

  if drw["returnValue"] == "fail":
    break
  lotto_data.append(drw)

  if drw["drwNo"] % 100 == 0:
    print("Current drwNo:", drw["drwNo"])
  drw_no += 1

print("--- END lotto data ---")
print()

print("Total data:", len(lotto_data))

--- GET lotto data ---
Current drwNo: 100
Current drwNo: 200
Current drwNo: 300
Current drwNo: 400
Current drwNo: 500
Current drwNo: 600
Current drwNo: 700
Current drwNo: 800
Current drwNo: 900
Current drwNo: 1000
--- END lotto data ---

Total data: 1076


In [None]:
lotto_df = pd.DataFrame(lotto_data)
lotto_df

Unnamed: 0,totSellamnt,returnValue,drwNoDate,firstWinamnt,drwtNo6,drwtNo4,firstPrzwnerCo,drwtNo5,bnusNo,firstAccumamnt,drwNo,drwtNo2,drwtNo3,drwtNo1
0,3681782000,success,2002-12-07,0,40,33,0,37,16,863604600,1,23,29,10
1,4904274000,success,2002-12-14,2002006800,42,25,1,32,2,0,2,13,21,9
2,4729342000,success,2002-12-21,2000000000,31,21,1,27,30,0,3,16,19,11
3,5271464000,success,2002-12-28,0,42,31,0,40,2,1267147200,4,27,30,14
4,6277102000,success,2003-01-04,0,42,40,0,41,3,3041094900,5,24,29,16
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1071,108156654000,success,2023-06-17,2175006375,43,23,12,32,27,26100076500,1072,18,20,16
1072,107352123000,success,2023-06-24,2345227603,38,30,11,32,15,25797503633,1073,18,28,6
1073,105744157000,success,2023-07-01,2134763657,41,27,12,28,15,25617163884,1074,6,20,1
1074,105635232000,success,2023-07-08,2896337167,45,35,9,44,10,26067034503,1075,23,24,1


### Update lotto data

In [7]:
lotto_df = pd.read_csv("./data/lotto.tsv", sep="\t")
lotto_df

Unnamed: 0,drwNo,returnValue,drwNoDate,totSellamnt,firstAccumamnt,firstPrzwnerCo,firstWinamnt,drwtNo1,drwtNo2,drwtNo3,drwtNo4,drwtNo5,drwtNo6,bnusNo
0,1,success,2002-12-07,3681782000,863604600,0,0,10,23,29,33,37,40,16
1,2,success,2002-12-14,4904274000,0,1,2002006800,9,13,21,25,32,42,2
2,3,success,2002-12-21,4729342000,0,1,2000000000,11,16,19,21,27,31,30
3,4,success,2002-12-28,5271464000,1267147200,0,0,14,27,30,31,40,42,2
4,5,success,2003-01-04,6277102000,3041094900,0,0,16,24,29,40,41,42,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1071,1072,success,2023-06-17,108156654000,26100076500,12,2175006375,16,18,20,23,32,43,27
1072,1073,success,2023-06-24,107352123000,25797503633,11,2345227603,6,18,28,30,32,38,15
1073,1074,success,2023-07-01,105744157000,25617163884,12,2134763657,1,6,20,27,28,41,15
1074,1075,success,2023-07-08,105635232000,26067034503,9,2896337167,1,23,24,35,44,45,10


In [8]:
new_lotto_data = []

print("--- GET lotto data ---")

drw_no = lotto_df["drwNo"].max() + 1
while True:
  res = requests.get(LOTTO_API + str(drw_no))
  drw = res.json()

  if drw["returnValue"] == "fail":
    break
  new_lotto_data.append(drw)

  if drw["drwNo"] % 10 == 0:
    print("Current drwNo:", drw["drwNo"])
  drw_no += 1

print("--- END lotto data ---")
print()

print("Total data:", len(new_lotto_data))

--- GET lotto data ---
Current drwNo: 1080
--- END lotto data ---

Total data: 5


In [9]:
new_lotto_df = pd.DataFrame(new_lotto_data)
new_lotto_df

Unnamed: 0,totSellamnt,returnValue,drwNoDate,firstWinamnt,drwtNo6,drwtNo4,firstPrzwnerCo,drwtNo5,bnusNo,firstAccumamnt,drwNo,drwtNo2,drwtNo3,drwtNo1
0,105927489000,success,2023-07-22,3570901018,43,30,7,40,34,24996307126,1077,8,17,4
1,105407618000,success,2023-07-29,2141604938,38,14,12,36,43,25699259256,1078,10,11,6
2,103120206000,success,2023-08-05,2712329417,45,24,9,37,6,24410964753,1079,8,18,4
3,104111986000,success,2023-08-12,3639444429,44,31,7,36,38,25476111003,1080,16,23,13
4,106974323000,success,2023-08-19,2343892944,38,23,11,24,17,25782822384,1081,9,16,1


In [10]:
lotto_df = pd.concat([lotto_df, new_lotto_df], ignore_index = True)
lotto_df = lotto_df.drop_duplicates()
lotto_df.tail(20)

Unnamed: 0,drwNo,returnValue,drwNoDate,totSellamnt,firstAccumamnt,firstPrzwnerCo,firstWinamnt,drwtNo1,drwtNo2,drwtNo3,drwtNo4,drwtNo5,drwtNo6,bnusNo
1061,1062,success,2023-04-08,107109911000,26613536628,7,3801933804,20,31,32,40,41,45,12
1062,1063,success,2023-04-15,108347135000,26392183125,7,3770311875,3,6,22,23,24,38,30
1063,1064,success,2023-04-22,109427584000,25615194014,19,1348168106,3,6,9,18,22,35,14
1064,1065,success,2023-04-29,108181727000,25936315132,14,1852593938,3,18,19,23,32,45,24
1065,1066,success,2023-05-06,106440950000,25064208750,15,1670947250,6,11,16,19,21,32,45
1066,1067,success,2023-05-13,110703299000,25754482130,13,1981114010,7,10,19,23,28,33,18
1067,1068,success,2023-05-20,108988932000,25914660766,19,1363929514,4,7,19,26,33,35,3
1068,1069,success,2023-05-27,108388892000,26085045756,14,1863217554,1,10,18,22,28,31,44
1069,1070,success,2023-06-03,107222802000,26027637006,14,1859116929,3,6,14,22,30,41,36
1070,1071,success,2023-06-10,107362630000,25919898750,5,5183979750,1,2,11,21,30,35,39


### Save lotto data

In [11]:
lotto_df = lotto_df[["drwNo", "returnValue", "drwNoDate", "totSellamnt", "firstAccumamnt", "firstPrzwnerCo", "firstWinamnt",
                     "drwtNo1", "drwtNo2", "drwtNo3", "drwtNo4", "drwtNo5", "drwtNo6", "bnusNo"]]

In [12]:
lotto_df.to_csv("./data/lotto.tsv", sep="\t", index=False)

## Result

In [13]:
lotto_df = pd.read_csv("./data/lotto.tsv", sep="\t")
lotto_df["drwNoDate"] = pd.to_datetime(lotto_df["drwNoDate"])
lotto_df

Unnamed: 0,drwNo,returnValue,drwNoDate,totSellamnt,firstAccumamnt,firstPrzwnerCo,firstWinamnt,drwtNo1,drwtNo2,drwtNo3,drwtNo4,drwtNo5,drwtNo6,bnusNo
0,1,success,2002-12-07,3681782000,863604600,0,0,10,23,29,33,37,40,16
1,2,success,2002-12-14,4904274000,0,1,2002006800,9,13,21,25,32,42,2
2,3,success,2002-12-21,4729342000,0,1,2000000000,11,16,19,21,27,31,30
3,4,success,2002-12-28,5271464000,1267147200,0,0,14,27,30,31,40,42,2
4,5,success,2003-01-04,6277102000,3041094900,0,0,16,24,29,40,41,42,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1076,1077,success,2023-07-22,105927489000,24996307126,7,3570901018,4,8,17,30,40,43,34
1077,1078,success,2023-07-29,105407618000,25699259256,12,2141604938,6,10,11,14,36,38,43
1078,1079,success,2023-08-05,103120206000,24410964753,9,2712329417,4,8,18,24,37,45,6
1079,1080,success,2023-08-12,104111986000,25476111003,7,3639444429,13,16,23,31,36,44,38
