In [1]:
import pathlib
import os
import shutil
import pandas as pd 

import utils
import artifact
import artifact_extractor
import plotly
import plotly.express as px
import plotly.graph_objects as go

from typing import Union
import datetime
PathLike = Union[str, os.PathLike]

In [2]:
pd.set_option('display.max_colwidth', None)

## Setup Directories for new artifact crops

In [3]:
# Local temp dir to save all the image crops.
save_dir = pathlib.Path().cwd() / "_new_artifact_temp"
save_dir.mkdir(exist_ok=True, parents=True)

# Directories containing the new artifact images.
new_artifact_dirs = ["~/Desktop/NVIDIA Artifacts", "~/Desktop/Compressed Png iPad"]
# artifact_source_dir = pathlib.Path("~/Desktop/source").expanduser()

# Directory paths to save intermediate steps. 
artifact_crop_output_dir = save_dir / "artifact_crop"
artifact_component_output_dir = save_dir / "artifact_components"
ocr_output_dir = save_dir / "ocr_output"
good_format_output_path = save_dir / "artifacts_good_format.json"

In [4]:
# Make sure Tesseract font is installed. 
artifact_extractor.copy_tesseract_font()

## Run Artifact Extractor

In [7]:
# Produce crops for all of the new images.
# Locate the +0 template image and produce a crop for each new artifact image.
for new_artifact_dir in new_artifact_dirs:
    artifact_source_dir = pathlib.Path(new_artifact_dir).expanduser()
    artifact_extractor.crop_new_artifacts_multiprocess(artifact_source_dir, artifact_crop_output_dir)

  0%|          | 0/2552 [00:00<?, ?it/s]

  0%|          | 0/712 [00:00<?, ?it/s]

In [8]:
artifact_extractor.get_artifact_components(artifact_crop_output_dir, output_dir=artifact_component_output_dir)

Getting artifact component crops...


  0%|          | 0/3249 [00:00<?, ?it/s]

In [9]:
artifacts = artifact_extractor.run_ocr_on_artifact_components_multiprocess(
    artifact_component_output_dir, ocr_output_dir
)

Running OCR...


  0%|          | 0/3249 [00:00<?, ?it/s]

In [5]:
# Save artifacts to good format
artifacts = artifact_extractor.load_json(ocr_output_dir / "artifacts.json")
all_artifacts = artifact_extractor.remove_duplicate_artifacts(artifacts=artifacts)


In [6]:
artifact.artifact_list_to_good_format_json(
    all_artifacts, output_path=good_format_output_path
)

In [7]:
all_artifacts[0]

Artifact(
    artifact_type="Sands of Eon",
    level="0",
    rarity="4",
    main_stat="HP",
    value="7.0%",
    set_name="Wanderer's Troupe",
    substats="{'ATK': 14.0, 'HP': 299.0, 'Energy Recharge%': 4.5, 'CRIT DMG%': 7.8}",
    equipped="None",
    artifact_id="Genshin Impact 2023.09.05 - 17.43.37.00 2023-09-05 17:43:37",
    file_path="None",
)

In [8]:
all_artifacts[0].roll_value

340

In [9]:
df = pd.DataFrame([a.to_dict() for a in all_artifacts])

In [10]:
df.sort_values(by="creation_time", ascending=True, inplace=True)

In [11]:
df["rarity"] = 5
df["count"] = 1
df["substat_count"] = df["substats"].apply(lambda x: len(x))

In [12]:
df

Unnamed: 0,artifact_type,level,rarity,main_stat,value,set_name,substats,roll_value,crit_value,equipped,artifact_id,creation_time,count,substat_count
2753,Circlet of Logos,0,5,DEF,8.7%,Crimson Witch of Flames,"{'HP%': 5.3, 'DEF': 21.0, 'CRIT DMG%': 5.4}",250,5.4,,IMG_4417 2023-08-16 00:27:02,2023-08-16 00:27:02,1,3
2755,Plume of Death,0,5,ATK,47,Crimson Witch of Flames,"{'DEF%': 7.3, 'HP': 299.0, 'CRIT Rate%': 3.5}",290,7.0,,IMG_4419 2023-08-16 00:27:02,2023-08-16 00:27:02,1,3
2731,Circlet of Logos,0,5,HP,7.0%,Marechaussee Hunter,"{'ATK': 16.0, 'HP': 209.0, 'CRIT DMG%': 5.4, 'Energy Recharge%': 6.5}",320,5.4,,IMG_4395 2023-08-16 00:27:02,2023-08-16 00:27:02,1,4
2748,Circlet of Logos,0,5,ATK,7.0%,Marechaussee Hunter,"{'HP': 209.0, 'DEF': 21.0, 'ATK': 18.0}",250,0.0,,IMG_4412 2023-08-16 00:27:02,2023-08-16 00:27:02,1,3
2756,Flower of Life,0,5,HP,717,Crimson Witch of Flames,"{'CRIT DMG%': 7.8, 'DEF%': 7.3, 'ATK%': 4.1}",270,7.8,,IMG_4420 2023-08-16 00:27:02,2023-08-16 00:27:02,1,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2532,Circlet of Logos,0,5,Healing Bonus,5.4%,Emblem of Severed Fate,"{'HP': 269.0, 'Energy Recharge%': 5.8, 'Elemental Mastery': 19.0}",260,0.0,,Genshin Impact 2024.01.07 - 08.34.19.23 2024-01-07 08:34:19,2024-01-07 08:34:19,1,3
2533,Circlet of Logos,0,5,HP,7.0%,Emblem of Severed Fate,"{'Elemental Mastery': 19.0, 'Energy Recharge%': 5.8, 'DEF%': 7.3}",270,0.0,,Genshin Impact 2024.01.07 - 08.34.21.24 2024-01-07 08:34:21,2024-01-07 08:34:21,1,3
2534,Circlet of Logos,0,5,HP,7.0%,Emblem of Severed Fate,"{'CRIT DMG%': 6.2, 'DEF': 21.0, 'Energy Recharge%': 5.2, 'ATK': 14.0}",320,6.2,,Genshin Impact 2024.01.07 - 08.34.31.25 2024-01-07 08:34:31,2024-01-07 08:34:31,1,4
2535,Circlet of Logos,0,5,HP,7.0%,Emblem of Severed Fate,"{'HP': 269.0, 'ATK%': 5.3, 'CRIT DMG%': 7.0, 'ATK': 18.0}",360,7.0,,Genshin Impact 2024.01.07 - 08.34.33.26 2024-01-07 08:34:33,2024-01-07 08:34:33,1,4


## Plots

In [58]:
fig = px.pie(df, names="artifact_type", values="count")
fig.update_traces(textinfo="value+label")
fig.show()

In [59]:
fig = px.sunburst(df, path=["artifact_type", "main_stat"], values="count", title="Artifact type by main stat", width=1000, height=1000)
fig.update_traces(textinfo="value+label")
fig.show()

In [62]:
fig = px.pie(df, names="substat_count", values="count", title="Substat Count")
fig.update_traces(textinfo="value+label")
fig.show()

In [63]:
fig = px.pie(df, names="set_name", values="count", title="Artifact Set Distribution", width=800, height=800)
fig.update_traces(textinfo="value+label")
fig.show()

In [64]:
fig = px.pie(df, names="main_stat", values="count", title="Artifact Set Distribution", width=800, height=800)
fig.update_traces(textinfo="value+label")
fig.show()

In [65]:
df_count_time = df.groupby(df["creation_time"].dt.date)["count"].sum().to_frame().cumsum().reset_index()

In [66]:
fig = px.line(df_count_time, x="creation_time", y="count", title="Artifact count over time")
fig.add_vrect(x0="2023-08-16", x1="2023-09-26", fillcolor="LightGray", opacity=0.5, line_width=0)
fig.add_vrect(x0="2023-11-08", x1="2023-12-19", fillcolor="LightGray", opacity=0.5, line_width=0)

In [67]:
df_count_day = df.groupby(df["creation_time"].dt.date)["count"].sum().to_frame().reset_index()
fig = px.line(df_count_day, x="creation_time", y="count", title="Artifacts obtained each day")
fig.add_vrect(x0="2023-08-16", x1="2023-09-26", fillcolor="LightGray", opacity=0.5, line_width=0)
fig.add_vrect(x0="2023-11-08", x1="2023-12-19", fillcolor="LightGray", opacity=0.5, line_width=0)

In [68]:
# Double crit line not circlet
def crit_counts(row: pd.Series) -> int:
    count = 0
    for substat in row["substats"]:
        if substat.startswith("CRIT"):
            count += 1
    return count

df["crit_counts"] = df.apply(crit_counts, axis=1)

df[df["artifact_type"] != "Circlet of Logos"]["crit_counts"].value_counts()

crit_counts
0    1361
1    1108
2     130
Name: count, dtype: int64

In [69]:
# Crit circlets with crit substats
df[(df["artifact_type"] == "Circlet of Logos") & ((df["main_stat"] == "CRIT Rate") | (df["main_stat"] == "CRIT DMG"))]["crit_counts"].value_counts()

crit_counts
0    96
1    32
Name: count, dtype: int64

In [75]:
temp = df[(df["artifact_type"] == "Circlet of Logos") & ((df["main_stat"] == "CRIT Rate") | (df["main_stat"] == "CRIT DMG"))]

In [86]:
temp.pivot_table(values="count", index="substat_count", columns="crit_counts", aggfunc="count")

crit_counts,0,1
substat_count,Unnamed: 1_level_1,Unnamed: 2_level_1
3,74,25
4,22,7


In [70]:
df[(df["artifact_type"] == "Circlet of Logos") & ((df["main_stat"] == "CRIT Rate") | (df["main_stat"] == "CRIT DMG"))].sort_values(by=["crit_counts", "substat_count"], ascending=False)

Unnamed: 0,artifact_type,level,rarity,main_stat,value,set_name,substats,equipped,artifact_id,creation_time,count,substat_count,crit_counts
2622,Circlet of Logos,0,5,CRIT DMG,9.3%,Emblem of Severed Fate,"{'ATK%': 5.3, 'HP': 209.0, 'CRIT Rate%': 3.1, 'HP%': 4.7}",,IMG_4282 2023-08-16 00:59:01,2023-08-16 00:59:01,1,4,1
706,Circlet of Logos,0,5,CRIT Rate,4.7%,Marechaussee Hunter,"{'CRIT DMG%': 6.2, 'DEF': 16.0, 'DEF%': 6.6, 'ATK': 19.0}",,Genshin Impact 2023.10.09 - 22.40.12.22 2023-10-09 22:40:12,2023-10-09 22:40:12,1,4,1
1399,Circlet of Logos,0,5,CRIT Rate,4.7%,Crimson Witch of Flames,"{'CRIT DMG%': 6.2, 'DEF%': 5.1, 'HP': 239.0, 'ATK': 18.0}",,Genshin Impact 2023.11.07 - 22.47.39.34 2023-11-07 22:47:39,2023-11-07 22:47:39,1,4,1
1755,Circlet of Logos,0,5,CRIT Rate,4.7%,Wanderer's Troupe,"{'DEF%': 6.6, 'CRIT DMG%': 5.4, 'HP': 209.0, 'Elemental Mastery': 23.0}",,Genshin Impact 2023.11.27 - 19.58.42.01 2023-11-27 19:58:42,2023-11-27 19:58:42,1,4,1
1946,Circlet of Logos,0,5,CRIT DMG,9.3%,Emblem of Severed Fate,"{'CRIT Rate%': 3.5, 'ATK': 19.0, 'ATK%': 4.7, 'DEF%': 7.3}",,Genshin Impact 2023.12.07 - 10.19.10.38 2023-12-07 10:19:11,2023-12-07 10:19:11,1,4,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2344,Circlet of Logos,0,5,CRIT Rate,4.7%,Golden Troupe,"{'ATK%': 5.3, 'DEF%': 6.6, 'ATK': 14.0}",,Genshin Impact 2023.12.30 - 13.41.15.15 2023-12-30 13:41:15,2023-12-30 13:41:15,1,3,0
2390,Circlet of Logos,0,5,CRIT DMG,9.3%,Marechaussee Hunter,"{'HP': 299.0, 'DEF': 16.0, 'ATK': 16.0}",,Genshin Impact 2024.01.01 - 10.58.27.11 2024-01-01 10:58:27,2024-01-01 10:58:27,1,3,0
2403,Circlet of Logos,0,5,CRIT DMG,9.3%,Golden Troupe,"{'HP': 269.0, 'HP%': 4.1, 'ATK': 14.0}",,Genshin Impact 2024.01.02 - 19.54.18.01 2024-01-02 19:54:18,2024-01-02 19:54:18,1,3,0
2445,Circlet of Logos,0,5,CRIT DMG,9.3%,Emblem of Severed Fate,"{'ATK': 16.0, 'DEF': 19.0, 'Energy Recharge%': 5.8}",,Genshin Impact 2024.01.03 - 22.44.37.16 2024-01-03 22:44:37,2024-01-03 22:44:37,1,3,0


### Roll Value

In [15]:
px.histogram(df, x="roll_value", nbins=20, title="Artifact Roll Value", labels={"roll_value": "Artifact Roll Value (%)"}, text_auto=True)

In [16]:
df[df["roll_value"] == 400]

Unnamed: 0,artifact_type,level,rarity,main_stat,value,set_name,substats,roll_value,equipped,artifact_id,creation_time,count,substat_count
2777,Plume of Death,0,5,ATK,47,Marechaussee Hunter,"{'CRIT DMG%': 7.8, 'HP%': 5.8, 'Energy Recharge%': 6.5, 'DEF': 23.0}",400,,IMG_4442 2023-08-17 01:29:37,2023-08-17 01:29:37,1,4
2992,Plume of Death,0,5,ATK,47,Emblem of Severed Fate,"{'HP': 299.0, 'HP%': 5.8, 'ATK%': 5.8, 'CRIT DMG%': 7.8}",400,,IMG_4666 2023-08-28 09:52:22,2023-08-28 09:52:22,1,4
3212,Flower of Life,0,5,HP,717,Golden Troupe,"{'ATK': 19.0, 'Elemental Mastery': 23.0, 'HP%': 5.8, 'ATK%': 5.8}",400,,IMG_4893 2023-09-07 11:39:56,2023-09-07 11:39:56,1,4
3199,Goblet of Eonothem,0,5,ATK,7.0%,Marechaussee Hunter,"{'HP%': 5.8, 'DEF': 23.0, 'ATK': 19.0, 'DEF%': 7.3}",400,,IMG_4880 2023-09-07 11:40:11,2023-09-07 11:40:11,1,4
155,Plume of Death,0,5,ATK,47,Marechaussee Hunter,"{'DEF%': 7.3, 'HP%': 5.8, 'Energy Recharge%': 6.5, 'CRIT Rate%': 3.9}",400,,Genshin Impact 2023.09.13 - 22.30.27.28 2023-09-13 22:30:27,2023-09-13 22:30:27,1,4


### Crit Value

In [13]:
px.histogram(df, x="crit_value", title="Artifact Substat Crit Value", labels={"crit_value": "Artifact Substat Crit Value"}, text_auto=True)