In [1]:
import pathlib
import os
import shutil
import pandas as pd 

import utils
import artifact
import artifact_extractor
import plotly
import plotly.express as px

from typing import Union
import datetime
PathLike = Union[str, os.PathLike]

In [86]:
pd.set_option('display.max_colwidth', None)

## Setup Directories for new artifact crops

In [2]:
# Local temp dir to save all the image crops.
save_dir = pathlib.Path().cwd() / "_new_artifact_temp"
save_dir.mkdir(exist_ok=True, parents=True)

# Directories containing the new artifact images.
new_artifact_dirs = ["~/Desktop/NVIDIA Artifacts", "~/Desktop/Compressed Png iPad"]
# artifact_source_dir = pathlib.Path("~/Desktop/source").expanduser()

# Directory paths to save intermediate steps. 
artifact_crop_output_dir = save_dir / "artifact_crop"
artifact_component_output_dir = save_dir / "artifact_components"
ocr_output_dir = save_dir / "ocr_output"
good_format_output_path = save_dir / "artifacts_good_format.json"

In [3]:
# Make sure Tesseract font is installed. 
artifact_extractor.copy_tesseract_font()

## Run Artifact Extractor

In [4]:
# Produce crops for all of the new images.
# Locate the +0 template image and produce a crop for each new artifact image.
for new_artifact_dir in new_artifact_dirs:
    artifact_source_dir = pathlib.Path(new_artifact_dir).expanduser()
    artifact_extractor.crop_new_artifacts_multiprocess(artifact_source_dir, artifact_crop_output_dir)

  0%|          | 0/1368 [00:00<?, ?it/s]

  0%|          | 0/712 [00:00<?, ?it/s]

In [5]:
artifact_extractor.get_artifact_components(artifact_crop_output_dir, output_dir=artifact_component_output_dir)

Getting artifact component crops...


  0%|          | 0/2078 [00:00<?, ?it/s]

In [6]:
artifacts = artifact_extractor.run_ocr_on_artifact_components_multiprocess(
    artifact_component_output_dir, ocr_output_dir
)

Running OCR...


  0%|          | 0/2078 [00:00<?, ?it/s]

In [3]:
# Save artifacts to good format
artifacts = artifact_extractor.load_json(ocr_output_dir / "artifacts.json")
all_artifacts = artifact_extractor.remove_duplicate_artifacts(artifacts=artifacts)


In [4]:
artifact.artifact_list_to_good_format_json(
    all_artifacts, output_path=good_format_output_path
)

In [5]:
all_artifacts[0]

Artifact(
    artifact_type="Sands of Eon",
    level="0",
    rarity="4",
    main_stat="HP",
    value="7.0%",
    set_name="Wanderer's Troupe",
    substats="{'ATK': 14.0, 'HP': 299.0, 'Energy Recharge%': 4.5, 'CRIT DMG%': 7.8}",
    equipped="None",
    artifact_id="Genshin Impact 2023.09.05 - 17.43.37.00 2023-09-05 17:43:37",
    file_path="None",
)

In [6]:
df = pd.DataFrame([a.to_dict() for a in all_artifacts])

In [7]:
df.sort_values(by="creation_time", ascending=True, inplace=True)

In [24]:
df["rarity"] = 5
df["count"] = 1
df["substat_count"] = df["substats"].apply(lambda x: len(x))

In [25]:
df

Unnamed: 0,artifact_type,level,rarity,main_stat,value,set_name,substats,equipped,artifact_id,creation_time,count,substat_count
1582,Circlet of Logos,0,5,DEF,8.7%,Crimson Witch of Flames,"{'HP%': 5.3, 'DEF': 21.0, 'CRIT DMG%': 5.4}",,IMG_4417 2023-08-16 00:27:02,2023-08-16 00:27:02,1,3
1584,Plume of Death,0,5,ATK,47,Crimson Witch of Flames,"{'DEF%': 7.3, 'HP': 299.0, 'CRIT Rate%': 3.5}",,IMG_4419 2023-08-16 00:27:02,2023-08-16 00:27:02,1,3
1560,Circlet of Logos,0,5,HP,7.0%,Marechaussee Hunter,"{'ATK': 16.0, 'HP': 209.0, 'CRIT DMG%': 5.4, '...",,IMG_4395 2023-08-16 00:27:02,2023-08-16 00:27:02,1,4
1577,Circlet of Logos,0,5,ATK,7.0%,Marechaussee Hunter,"{'HP': 209.0, 'DEF': 21.0, 'ATK': 18.0}",,IMG_4412 2023-08-16 00:27:02,2023-08-16 00:27:02,1,3
1585,Flower of Life,0,5,HP,717,Crimson Witch of Flames,"{'CRIT DMG%': 7.8, 'DEF%': 7.3, 'ATK%': 4.1}",,IMG_4420 2023-08-16 00:27:02,2023-08-16 00:27:02,1,3
...,...,...,...,...,...,...,...,...,...,...,...,...
1361,Plume of Death,0,5,ATK,47,Golden Troupe,"{'DEF%': 7.3, 'ATK%': 4.1, 'Energy Recharge%':...",,Genshin Impact 2023.11.07 - 09.58.21.01 2023-1...,2023-11-07 09:58:21,1,3
1362,Goblet of Eonothem,0,5,DEF,8.7%,Marechaussee Hunter,"{'HP': 239.0, 'Energy Recharge%': 4.5, 'ATK%':...",,Genshin Impact 2023.11.07 - 09.59.21.02 2023-1...,2023-11-07 09:59:21,1,3
1363,Goblet of Eonothem,0,5,ATK,7.0%,Golden Troupe,"{'DEF%': 6.6, 'Energy Recharge%': 5.8, 'ATK': ...",,Genshin Impact 2023.11.07 - 09.59.23.03 2023-1...,2023-11-07 09:59:23,1,4
1364,Plume of Death,0,5,ATK,47,Golden Troupe,"{'HP': 239.0, 'CRIT DMG%': 5.4, 'DEF%': 5.1}",,Genshin Impact 2023.11.07 - 10.00.10.04 2023-1...,2023-11-07 10:00:10,1,3


## Plots

In [29]:
fig = px.pie(df, names="artifact_type", values="count")
fig.update_traces(textinfo="value+label")
fig.show()

In [73]:
fig = px.sunburst(df, path=["artifact_type", "main_stat"], values="count", title="Artifact type by main stat", width=1000, height=1000)
fig.update_traces(textinfo="value+label")
fig.show()

In [30]:
fig = px.pie(df, names="substat_count", values="count", title="Substat Count")
fig.update_traces(textinfo="value+label")
fig.show()

In [74]:
fig = px.pie(df, names="set_name", values="count", title="Artifact Set Distribution", width=800, height=800)
fig.update_traces(textinfo="value+label")
fig.show()

In [36]:
fig = px.pie(df, names="main_stat", values="count", title="Artifact Set Distribution", width=800, height=800)
fig.update_traces(textinfo="value+label")
fig.show()

In [63]:
df_count_time = df.groupby(df["creation_time"].dt.date)["count"].sum().to_frame().cumsum().reset_index()

In [65]:
px.line(df_count_time, x="creation_time", y="count", title="Artifact Count Over Time")

In [84]:
# Double crit line not circlet
def crit_counts(row: pd.Series) -> int:
    count = 0
    for substat in row["substats"]:
        if substat.startswith("CRIT"):
            count += 1
    return count

df["crit_counts"] = df.apply(crit_counts, axis=1)

df[df["artifact_type"] != "Circlet of Logos"]["crit_counts"].value_counts()

crit_counts
0    870
1    714
2     78
Name: count, dtype: int64

In [93]:
# Crit circlets with crit substats
df[(df["artifact_type"] == "Circlet of Logos") & ((df["main_stat"] == "CRIT Rate") | (df["main_stat"] == "CRIT DMG"))]["crit_counts"].value_counts()

crit_counts
0    64
1    19
Name: count, dtype: int64

In [96]:
df[(df["artifact_type"] == "Circlet of Logos") & ((df["main_stat"] == "CRIT Rate") | (df["main_stat"] == "CRIT DMG"))].sort_values(by=["crit_counts", "substat_count"], ascending=False)

Unnamed: 0,artifact_type,level,rarity,main_stat,value,set_name,substats,equipped,artifact_id,creation_time,count,substat_count,crit_counts
1451,Circlet of Logos,0,5,CRIT DMG,9.3%,Emblem of Severed Fate,"{'ATK%': 5.3, 'HP': 209.0, 'CRIT Rate%': 3.1, 'HP%': 4.7}",,IMG_4282 2023-08-16 00:59:01,2023-08-16 00:59:01,1,4,1
706,Circlet of Logos,0,5,CRIT Rate,4.7%,Marechaussee Hunter,"{'CRIT DMG%': 6.2, 'DEF': 16.0, 'DEF%': 6.6, 'ATK': 19.0}",,Genshin Impact 2023.10.09 - 22.40.12.22 2023-10-09 22:40:12,2023-10-09 22:40:12,1,4,1
1450,Circlet of Logos,0,5,CRIT Rate,4.7%,Emblem of Severed Fate,"{'DEF': 16.0, 'CRIT DMG%': 5.4, 'Elemental Mastery': 23.0}",,IMG_4281 2023-08-16 00:58:43,2023-08-16 00:58:43,1,3,1
1422,Circlet of Logos,0,5,CRIT DMG,9.3%,Emblem of Severed Fate,"{'ATK': 18.0, 'CRIT Rate%': 3.5, 'HP': 209.0}",,IMG_4253 2023-08-16 01:03:07,2023-08-16 01:03:07,1,3,1
1367,Circlet of Logos,0,5,CRIT DMG,9.3%,Marechaussee Hunter,"{'CRIT Rate%': 3.9, 'DEF%': 5.8, 'DEF': 19.0}",,IMG_4198 2023-08-16 01:04:25,2023-08-16 01:04:25,1,3,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1224,Circlet of Logos,0,5,CRIT DMG,9.3%,Golden Troupe,"{'ATK': 18.0, 'HP%': 4.1, 'ATK%': 4.7}",,Genshin Impact 2023.10.31 - 12.02.52.11 2023-10-31 12:02:52,2023-10-31 12:02:52,1,3,0
1250,Circlet of Logos,0,5,CRIT DMG,9.3%,Marechaussee Hunter,"{'HP%': 4.1, 'DEF%': 5.1, 'ATK': 16.0}",,Genshin Impact 2023.11.01 - 09.15.40.01 2023-11-01 09:15:40,2023-11-01 09:15:40,1,3,0
1335,Circlet of Logos,0,5,CRIT Rate,4.7%,Marechaussee Hunter,"{'DEF%': 7.3, 'Energy Recharge%': 6.5, 'ATK': 16.0}",,Genshin Impact 2023.11.05 - 21.28.05.07 2023-11-05 21:28:05,2023-11-05 21:28:05,1,3,0
1337,Circlet of Logos,0,5,CRIT Rate,4.7%,Marechaussee Hunter,"{'HP': 239.0, 'DEF%': 7.3, 'HP%': 5.3}",,Genshin Impact 2023.11.05 - 21.28.54.09 2023-11-05 21:28:54,2023-11-05 21:28:54,1,3,0


In [97]:
len(df)

2077