In [20]:
import json

import pandas as pd

In [21]:
data = pd.read_csv("../test_data/mae_raw_data.csv")

In [22]:
data

Unnamed: 0,Source,Date,Time,Char Count/Day,Line Count/Day,Char Count/Hour
0,ATRI,31/1/2021,3.49,15163,526,4345
1,ATRI,1/2/2021,4.58,16281,808,3555
2,ATRI,2/2/2021,2.59,16664,799,6434
3,ATRI,3/2/2021,2.97,18938,915,6376
4,ATRI,4/2/2021,2.66,17020,797,6398
...,...,...,...,...,...,...
264,フェイト／ステイナイト,18/6/2022,2.03,30028,1295,14792
265,薄桜鬼 新選組奇譚,13/6/2022,3.20,35142,1593,10982
266,薄桜鬼 新選組奇譚,14/6/2022,2.77,35023,1623,12644
267,薄桜鬼 新選組奇譚,15/6/2022,2.50,37212,1861,14885


In [23]:
data["name"] = data["Source"]

In [24]:
data["lines_read"] = data["Line Count/Day"]
data["chars_read"] = data["Char Count/Day"]
data["time_read"] = data["Time"] * 60 * 60
data["last_line_recieved"] = None

In [25]:
data["Date"]

0      31/1/2021
1       1/2/2021
2       2/2/2021
3       3/2/2021
4       4/2/2021
         ...    
264    18/6/2022
265    13/6/2022
266    14/6/2022
267    15/6/2022
268    16/6/2022
Name: Date, Length: 269, dtype: object

In [26]:
data["date"] = pd.to_datetime(data["Date"], dayfirst=True)

In [27]:
data["game_date"] = data["name"] + "_" + data["date"].dt.strftime("%Y-%m-%d")

In [28]:
data.drop(["Source", "Time", "Char Count/Day", "Line Count/Day", "Char Count/Hour", "Date"], axis=1, inplace=True)

In [29]:
game_entries = {
    game_dataframe[0]: {
        "name": game_dataframe[0],
        "dates_read_on": game_dataframe[1].drop("game_date", axis=1)["date"].dt.strftime("%Y-%m-%d").to_list(),
        "last_line_added": None
    } for game_dataframe in data.groupby("name")
}

In [30]:
games = {"games": list(data["name"].unique())}

In [31]:
data.drop(["name", "date"], axis=1, inplace=True)

In [32]:
data

Unnamed: 0,lines_read,chars_read,time_read,last_line_recieved,game_date
0,526,15163,12564.0,,ATRI_2021-01-31
1,808,16281,16488.0,,ATRI_2021-02-01
2,799,16664,9324.0,,ATRI_2021-02-02
3,915,18938,10692.0,,ATRI_2021-02-03
4,797,17020,9576.0,,ATRI_2021-02-04
...,...,...,...,...,...
264,1295,30028,7308.0,,フェイト／ステイナイト_2022-06-18
265,1593,35142,11520.0,,薄桜鬼 新選組奇譚_2022-06-13
266,1623,35023,9972.0,,薄桜鬼 新選組奇譚_2022-06-14
267,1861,37212,9000.0,,薄桜鬼 新選組奇譚_2022-06-15


In [33]:
game_date_entries = {game_dataframe[0]: game_dataframe[1].drop("game_date", axis=1).to_dict(orient="records")[0] for game_dataframe in data.groupby("game_date")}

In [34]:
insertion_dict = games | game_entries | game_date_entries

In [35]:
insertion_dict

{'games': ['ATRI',
  'ISLAND',
  'ファタモル',
  '白昼夢',
  '剣が君',
  'あやかしごはん',
  '大正×対称アリス',
  'あやかしごはんおかわり',
  'ソナーニル',
  'Angel Beats!',
  '穢翼のユースティア',
  'PARQUET',
  'オメガヴァンパイア',
  'ホワイトアルバム2',
  '君と彼女と彼女の恋',
  'サマーポケッツ',
  'サクラノ詩－櫻の森の上を舞う－',
  'シュタインズ・ゲート',
  'フェイト／ステイナイト',
  '薄桜鬼 新選組奇譚'],
 'ATRI': {'name': 'ATRI',
  'dates_read_on': ['2021-01-31',
   '2021-02-01',
   '2021-02-02',
   '2021-02-03',
   '2021-02-04',
   '2021-02-05',
   '2021-02-06',
   '2021-02-07',
   '2021-02-08',
   '2021-02-09',
   '2021-02-10',
   '2021-02-11',
   '2021-02-12',
   '2021-02-13',
   '2021-02-14',
   '2021-02-15',
   '2021-02-26',
   '2021-02-27',
   '2021-02-28'],
  'last_line_added': None},
 'Angel Beats!': {'name': 'Angel Beats!',
  'dates_read_on': ['2021-07-01',
   '2021-07-02',
   '2021-07-03',
   '2021-07-09',
   '2021-07-22',
   '2021-11-28',
   '2021-11-29',
   '2021-12-01',
   '2021-12-05',
   '2021-12-06',
   '2021-12-07',
   '2021-12-09',
   '2021-12-11',
   '2021-12-12'],
  'last_line_added

In [36]:
with open("../test_data/mae_processed_data.json", "w", encoding="utf-8") as file:
    json.dump(insertion_dict, file, ensure_ascii=False)

In [37]:
data["time_read"] = data["time_read"] / (60 * 60)
data["read_speed"] = data["chars_read"] / data["time_read"]
data["date"] = pd.date_range(start="January 31st 2021", periods=len(data), freq="1D", tz="Asia/Manila")

In [38]:
data

Unnamed: 0,lines_read,chars_read,time_read,last_line_recieved,game_date,read_speed,date
0,526,15163,3.49,,ATRI_2021-01-31,4344.699140,2021-01-31 00:00:00+08:00
1,808,16281,4.58,,ATRI_2021-02-01,3554.803493,2021-02-01 00:00:00+08:00
2,799,16664,2.59,,ATRI_2021-02-02,6433.976834,2021-02-02 00:00:00+08:00
3,915,18938,2.97,,ATRI_2021-02-03,6376.430976,2021-02-03 00:00:00+08:00
4,797,17020,2.66,,ATRI_2021-02-04,6398.496241,2021-02-04 00:00:00+08:00
...,...,...,...,...,...,...,...
264,1295,30028,2.03,,フェイト／ステイナイト_2022-06-18,14792.118227,2021-10-22 00:00:00+08:00
265,1593,35142,3.20,,薄桜鬼 新選組奇譚_2022-06-13,10981.875000,2021-10-23 00:00:00+08:00
266,1623,35023,2.77,,薄桜鬼 新選組奇譚_2022-06-14,12643.682310,2021-10-24 00:00:00+08:00
267,1861,37212,2.50,,薄桜鬼 新選組奇譚_2022-06-15,14884.800000,2021-10-25 00:00:00+08:00
