# Iruma-Kun 
Dies ist eine Datenanalyse von Carakteren aus dem Manga **"Welcome to Demon School Iruma-kun!"**.

Daten werden über das [Fanwiki](https://mairimashita-irumakun.fandom.com/wiki/Mairimashita!_Iruma-kun_Wiki) erfasst. Durch `BeautifulSoup4` werden HTML-Daten der einzelnen Kapitel nacheinander heruntergeladen und ausgewertet.

## Abfragen und Speichern der Rohdaten aus Fandom-Wiki

In [71]:
# Parameter

startChapter = 1
endChapter = 413
checkFandom = False

url = "https://mairimashita-irumakun.fandom.com/wiki/Chapter_"

In [72]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

rows = []

if  checkFandom:
    for i in range(startChapter,endChapter+1,1):

        response = requests.get(f"{url}{str(i)}")

        if response.status_code == 200:
        
            soup = BeautifulSoup(response.text, "html.parser")

            headline = soup.find("span", id=re.compile(r"Characters_[bB]y_Appearance"))

            if headline is None:
                continue

            ul = headline.find_parent("h2").find_next_sibling("ul")

            for li in ul.find_all("li"):
                name_text = li.get_text(strip=True)
                
                rows.append({
                    "Kapitel": i,
                    "Name": name_text
                })

                print(f"CH_{i}: {name_text}")


        else:
            print(f"Fehler in Kapitel {i}: {response.status_code}")

    df = pd.DataFrame(rows)
    df.to_json("data/Characters_by_Appearance-RAW.json", orient="records", lines=False, indent=4, force_ascii=False)

else:
    print("checkFandom = False -> No new data requested")

checkFandom = False -> No new data requested


## Datenaufbereitung
Daten einlesen, säubern und bereinigen. Keine neue Datenabfrage aus dem Internet.

In [73]:
def remove_names(df: pd.DataFrame, names_to_remove: list) -> pd.DataFrame:
    """Entfernt alle Zeilen, deren Name in names_to_remove steht."""
    return df[~df["Name"].isin(names_to_remove)].reset_index(drop=True)


def split_names(df: pd.DataFrame, split_mapping: dict) -> pd.DataFrame:
    """
    Teilt bestimmte Namen in mehrere Personen auf.
    split_mapping = {
        "Name_im_Datensatz": ["Neuer Name 1", "Neuer Name 2"]
    }
    """
    rows = []
    for _, row in df.iterrows():
        if row["Name"] in split_mapping:
            for new_name in split_mapping[row["Name"]]:
                new_row = row.copy()
                new_row["Name"] = new_name
                rows.append(new_row)
        else:
            rows.append(row)
    return pd.DataFrame(rows).reset_index(drop=True)


def rename_names(df: pd.DataFrame, rename_mapping: dict) -> pd.DataFrame:
    """
    Schreibt bestimmte Namen um.
    rename_mapping = {
        "Alter Name": "Neuer Name"
    }
    """
    df["Name"] = df["Name"].replace(rename_mapping)
    return df

# Beispiele für Transformationen
#df = remove_names(df, ["Valac Clara"])  # z.B. löschen
#df = split_names(df, {"Gaap Goemon": ["Gaap", "Goemon"]})  # aufteilen
#df = rename_names(df, {"Azazel Ameri": "Ameri Azazel"})  # umschreiben

In [74]:
import pandas as pd

df = pd.read_json("data/Characters_by_Appearance-RAW.json")

df['Note'] = df['Name'].str.extract(r'\((.*?)\)')
df['Name'] = df['Name'].str.replace(r'\s*\(.*?\)', '', regex=True)

# Beispiele für Transformationen
df = remove_names(df, ["TBA"])
df = remove_names(df, ["Abnormal Class"])
df = remove_names(df, ["Official Release - Twitter"])
df = remove_names(df, ["Official Teaser - Twitter"])

df = split_names(df, {"Crocell Kerori/ Kuromu": ["Kuromu", "Crocell Kerori"]})
df = split_names(df, {"Fenrir/Naberius Narnia": ["Fenrir", "Naberius Narnia"]})
df = split_names(df, {"Naberius Narnia/Fenrir": ["Fenrir", "Naberius Narnia"]})
df = split_names(df, {"Malius Phantom /Andro M. Rock": ["Malius Phantom", "Andro M. Rock"]})
df = split_names(df, {"Barbatos Bachiko/Iruma Suzuki": ["Barbatos Bachiko", "Iruma Suzuki"]})
df = split_names(df, {"Andro M. Jazz/Azazel Ameri": ["Andro M. Jazz", "Azazel Ameri"]})
df = split_names(df, {"Azazel Ameri/Andro M. Jazz": ["Andro M. Jazz", "Azazel Ameri"]})

df = rename_names(df, {"Asmodeus Amaryllis": "Asmodeus Amuryllis"})

df.to_json("data/Characters_by_Appearance-CLEAN.json", orient="records", lines=False, indent=4, force_ascii=False)

print("> Saved Cleaned Data")

> Saved Cleaned Data


## Datenanalyse

### Fehlende Kapitel

In [75]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go

df = pd.read_json("data/Characters_by_Appearance-CLEAN.json")

allChapters = list(range(1,endChapter +1,1))
allCharacters = []
alreadyChecked = []

for index, row in df.iterrows():
    if row["Kapitel"] in alreadyChecked:
        pass
    else:
        alreadyChecked.append(row["Kapitel"])
        allChapters.remove(row["Kapitel"])

    if row["Name"] in allCharacters:
        pass
    else:
        allCharacters.append(row["Name"])


allChapters.sort()

print("STATUS FANDOM WIKI")
print("------------------")
print(f"Verfügbare Kapitel: {endChapter-len(allChapters)}/{endChapter} ({100-round(len(allChapters)/endChapter*100, 2)}%)")
print(f"Fehlende Kapitel: {allChapters}")   
print("------------------")
print(f"Anzahl Charaktere: {len(allCharacters)}")
print(f"Charaktere: {allCharacters}")
print("------------------")

# Gruppieren nach
# maxMissingChapter = allChapters[-1]
# groupBy = 100

# maxGroupedChapter = (maxMissingChapter - (maxMissingChapter % groupBy)) + groupBy

# grouped = {}
# for start in range(0, maxGroupedChapter, groupBy):
#     end = start + groupBy - 1
#     label = f"{start}-{end}"
#     grouped[label] = []

# for chap in allChapters:
#     bucket = (chap // groupBy) * groupBy
#     label = f"{bucket}-{bucket + groupBy - 1}"
#     grouped[label].append(chap)

# df_missing = pd.DataFrame.from_dict(grouped, orient="index").transpose()
# df_missing = df_missing.fillna(" ")

# fig = go.Figure(data=[go.Table(
#     header=dict(values=list(df_missing.columns),
#                 fill_color='paleturquoise',
#                 align='left'),
#     cells=dict(values=[df_missing[col] for col in df_missing.columns],
#                fill_color='lavender',
#                align='left'))
# ])

# fig.write_html("assets/MissingChapters.html")
# print("> Plotly Table saved to assets")

STATUS FANDOM WIKI
------------------
Verfügbare Kapitel: 296/413 (71.67%)
Fehlende Kapitel: [57, 58, 59, 60, 61, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 146, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 175, 176, 181, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 200, 201, 202, 203, 204, 208, 209, 210, 211, 212, 213, 241, 242, 243, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266]
------------------
Anzahl Charaktere: 208
Charaktere: ['Iruma Suzuki', 'Lord Sullivan', "Iruma's Father", "Iruma's Mother", 'Asmodeus Alice', 'Dantalion Dali', 'Eiko Oni', 'Opera', 'Naberius Kalego', 'Valac Clara', 'Yocchan', 'Camu Camu', 'Shax Lied', 'Andro M. Jazz', 'Ix Elizabetta', 'Gaap Goemon', 'Caim Kamui', 'Crocell Ke

### Erscheinung nach Kapitel

In [76]:
import pandas as pd
import plotly.express as px

# Daten laden
df = pd.read_json("data/Characters_by_Appearance-CLEAN.json")

# Scatterplot erstellen
fig = px.scatter(
    df,
    x="Kapitel",
    y="Name",
    color="Name",
    symbol="Name",
    title="<b>Character Appearances by Chapter</b>",
    subtitle="<i>All Appearances - Missing Chapters marked in red"
)

fig.update_yaxes(autorange="reversed")

fig.update_traces(
    marker=dict(
        symbol="square",
        size=5,
        line=dict(width=1)
    )
)

fig.update_layout(
    xaxis_title="Chapter",
    yaxis_title="Characters"
)
fig.update_layout(showlegend=False)
fig.update_yaxes(showticklabels=False)

for chap in allChapters:
    fig.add_vrect(
        x0=chap - 0.5,  # kleine Breite, damit es das Kapitel markiert
        x1=chap + 0.5,
        fillcolor="red",
        opacity=0.2,
        line_width=0,
        layer="below"  # Hintergrund
    )

fig.show()
fig.write_html("assets/AllAppearances.html")

print("> Plotly Graph saved to assets")


> Plotly Graph saved to assets


### Wie häufig erschienen?

In [77]:
import pandas as pd
import plotly.express as px

df = pd.read_json("data/Characters_by_Appearance-CLEAN.json")

df["Anzahl"] = 1

df_grouped = df.groupby("Name", as_index=False)["Anzahl"].sum()
df_grouped = df_grouped.sort_values("Anzahl", ascending=False)

df_grouped = df_grouped.head(20)

fig = px.bar(df_grouped, 
             x="Name", 
             y="Anzahl", 
             text="Anzahl", 
             color="Name",
             title="<b>Count of Apperances by Character - Top 20</b>",
             subtitle="<i>Sorted in descending order</i>"
    )

fig.update_traces(textposition='outside')
fig.update_layout(showlegend=False)

fig.update_layout(
    xaxis_title="Character",
    yaxis_title="Count"
)

fig.show()
fig.write_html("assets/MaxAppearancesRanking.html")

print("> Plotly Graph saved to assets")

> Plotly Graph saved to assets


### Caraktere pro Kapitel

In [78]:
import pandas as pd
import plotly.express as px

df = pd.read_json("data/Characters_by_Appearance-CLEAN.json")

df["Anzahl"] = 1
df["Kapitel"] = df["Kapitel"].astype(str)

df_grouped = df.groupby("Kapitel", as_index=False)["Anzahl"].sum()
df_grouped = df_grouped.sort_values("Anzahl", ascending=False)

df_grouped = df_grouped.head(20)

# Reihenfolge der Kapitel nach Anzahl
fig = px.bar(
    df_grouped,
    x="Kapitel",
    y="Anzahl",
    text="Anzahl",
    color="Kapitel",
    title="<b>Max count of (named) Characters by Chapter - Top 20</b>",
    subtitle="<i>Sorted in descending order</i>"
)

fig.update_layout(
    xaxis_title="Chapter",
    yaxis_title="Count"
)

fig.update_traces(textposition='outside')
fig.update_layout(showlegend=False)

fig.show()
fig.write_html("assets/MaxAppearancesPerChapterRanking.html")

print("> Plotly Graph saved to assets")


> Plotly Graph saved to assets


## Quick-And-Dirty Development Tools

In [79]:
import pandas as pd

# Load the original JSON back into a DataFrame
df = pd.read_json("data/Characters_by_Appearance-RAW.json")

# Save an Excel copy
df.to_excel("data/Characters_by_Appearance-RAW.xlsx", index=False)

print("Excel copy saved as Characters_by_Appearance-RAW.xlsx.")

Excel copy saved as Characters_by_Appearance-RAW.xlsx.
