In [1]:
import csv
import pandas as pd

In [49]:
file_names = ['mh2g', 'mh3u', 'mh4u', 'mhrs', 'mhwi', 'mhwilds', 'mhxx']

In [50]:
def normalize_within_rows(df, features_to_normalize, min_val=0):
    df_normalized = df.copy()
    
    # Select only the columns to normalize
    features_df = df_normalized[features_to_normalize]
    
    # Min-max normalization: (x - min) / (max - min)
    row_min = min_val
    row_max = features_df.max(axis=1)
    df_normalized[features_to_normalize] = features_df.sub(row_min, axis=0).div(row_max - row_min, axis=0)
        
    # Replace NaN values with 0 (occurs when std=0 or max=min)
    df_normalized[features_to_normalize] = df_normalized[features_to_normalize].fillna(0)
    
    return df_normalized


In [51]:
def normalize_columns(df, features_to_normalize):
    df_normalized = df.copy()
    
    # Min-max normalization: (x - min) / (max - min)
    for feature in features_to_normalize:
        col_min = df[feature].min()
        col_max = df[feature].max()
        df_normalized[feature] = (df[feature] - col_min) / (col_max - col_min)

    # Replace NaN values with 0 (occurs when std=0 or max=min)
    df_normalized[features_to_normalize] = df_normalized[features_to_normalize].fillna(0)
    
    return df_normalized

In [52]:
def multiply_dataframes(df1, df2, features_to_multiply):
    # Verify DataFrames have same shape
    if df1.shape != df2.shape:
        raise ValueError("DataFrames must have the same dimensions")
    
    # Verify specified columns exist in both DataFrames
    for col in features_to_multiply:
        if col not in df1.columns or col not in df2.columns:
            raise ValueError(f"Column '{col}' not found in both DataFrames")
    
    # Create a copy of df1 to store results
    result_df = df1.copy()
    
    # Multiply specified features
    for col in features_to_multiply:
        result_df[col] = df1[col] * df2[col]
    
    return result_df

In [53]:
def normalize_combined(df, features_to_normalize):
    normalized_by_rows = normalize_within_rows(df, features_to_normalize)
    normalized_by_columns = normalize_columns(df, features_to_normalize)
    return multiply_dataframes(normalized_by_rows, normalized_by_columns, features_to_normalize)

In [54]:
# Make weakness normalized csv

features_to_normalize = ['fire_weak', 'water_weak', 'thunder_weak', 'ice_weak', 'dragon_weak']
normalized_dfs = pd.DataFrame()

for file_name in file_names:
    df = pd.read_csv(f'../db/monster_weak/{file_name}_weak.csv')
    normalized_df = normalize_combined(df, features_to_normalize).round(2)
    normalized_df.to_csv(f'../db/monster_weak_norm/{file_name}_weak_norm.csv', index=False)
    normalized_dfs = pd.concat([normalized_dfs, normalized_df], axis=0)

# normalized_dfs.to_csv('normalized_weak.csv', index=False)

In [None]:
# Make monsters.csv

def remove_parenthesis(monster_name):
    return monster_name.split(' (')[0]

def get_monster_diff_states(monster_name, keys) -> list[str]:
    result = []
    for key in keys:
        if remove_parenthesis(key) == monster_name:
            result.append(key)

    return result

for file_name in file_names:
    result = dict()
    final_columns = ["monster", "fire_weak", "water_weak", "thunder_weak", "ice_weak", "dragon_weak"]
    # First, look into monster weakness file (because it denotes monsters in different states)
    with open(f'../db/monster_weak_norm/{file_name}_weak_norm.csv') as file:
        # monster,fire_weak,water_weak,thunder_weak,ice_weak,dragon_weak
        monsters_weakness = list(csv.reader(file))[1:]
        for monster_weakness in monsters_weakness:
            monster,fire_weak,water_weak,thunder_weak,ice_weak,dragon_weak = monster_weakness
            result[monster] = {
                "fire_weak": fire_weak,
                "water_weak": water_weak,
                "thunder_weak": thunder_weak,
                "ice_weak": ice_weak,
                "dragon_weak": dragon_weak
            }
    
    # Now get monsters' other attributes

    other_attributes = ["type", "element", "size_pred"]

    for other_attribute in other_attributes:
        # Remember to convert non-parenthesis to parenthesis if needed
        with open(f'../db/monster_{other_attribute}/{file_name}_{other_attribute}.csv') as file:
            instances = list(csv.reader(file))
            columns = instances[0][1:]  # Exclude 'monster' feature
            final_columns.extend(columns)
            instances = instances[1:]  # Exclude headers

            for instance in instances:
                monster = instance[0]
                features = instance[1:]
                states = get_monster_diff_states(monster, result.keys())
                for name in states:
                    for i in range(len(columns)):
                        result[name][columns[i]] = features[i]

    # Convert result
    result2 = []
    for monster, values in result.items():
        new_item = {"monster": monster}
        for col in final_columns[1:]:  # Exclude 'monster' feature
            new_item[col] = values[col]
        result2.append(new_item)

    with open(f'../db/monster/{file_name}.csv', 'w', newline='') as file:
        keys = final_columns
        writer = csv.DictWriter(file, fieldnames=keys)

        writer.writeheader()
        writer.writerows(result2)


In [55]:
# Make monster_fights.csv

def has_parenthesis(monster_name):
    return ' (' in monster_name

def remove_parenthesis(monster_name):
    return monster_name.split(' (')[0]


monsters_columns = None
monsters = []
for file_name in file_names:
    with open(f'../db/monster/{file_name}.csv') as file:
        content = list(csv.reader(file))
        if monsters_columns is None:
            monsters_columns = content[0]
        content = content[1:]
        monsters.extend(content)

same_monsters_dict = dict()
for monster in monsters:
    name = monster[0]
    if has_parenthesis(name):
        same_monsters_dict[name] = remove_parenthesis(name)

fights = pd.read_csv('../db/fights.csv').values.tolist()
monsters_columns_initiator = ['I_'+mc for mc in monsters_columns]
monsters_columns_opponent = ['O_'+mc for mc in monsters_columns]
csv_columns = []
csv_columns.extend(monsters_columns_initiator[1:])
csv_columns.extend(monsters_columns_opponent[1:])
csv_columns.append("Outcome")

monsters_dict = {monster[0]: monster[1:] for monster in monsters}

def find_same_monster(monster_name) -> list:
    result = []
    for key, value in same_monsters_dict.items():
        if value == monster_name:
            result.append(key)
    return result

result = []
for fight in fights:
    initiator, opponent, outcome = fight
    initiator_data = []
    opponent_data = []
    
    if initiator in monsters_dict and opponent in monsters_dict:
        initiator_data = monsters_dict[initiator]
        opponent_data = monsters_dict[opponent]
        row_data = []
        row_data.extend(initiator_data)
        row_data.extend(opponent_data)
        row = {column: data for data, column in zip(row_data, csv_columns[:-1])}
        row["Outcome"] = outcome
        result.append(row)
    else:
        same_initiators = [initiator]
        same_opponents = [opponent]
        if initiator not in monsters_dict:
            same_initiators = find_same_monster(initiator)
        if opponent not in monsters_dict:
            same_opponents = find_same_monster(opponent)
        wars = []
        for same_initiator in same_initiators:
            for same_opponent in same_opponents:
                wars.append((same_initiator, same_opponent))
        
        for war in wars:
            initiator_data = monsters_dict[war[0]]
            opponent_data = monsters_dict[war[1]]
            row_data = []
            row_data.extend(initiator_data)
            row_data.extend(opponent_data)
            row = {column: data for data, column in zip(row_data, csv_columns[:-1])}
            row["Outcome"] = outcome
            result.append(row)

with open('../db/monster_fights.csv', 'w', newline='',) as file:
    keys = csv_columns
    writer = csv.DictWriter(file, fieldnames=keys)

    writer.writeheader()
    writer.writerows(result)
