## Goals
- Clean the data to make it more useful
- Predict Armor Gacha banners

## Imports

In [1]:
# Basic
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Utilities
import os

# Notebook Utilities
%matplotlib inline

## Functions

In [2]:
# Function which seperates feature types
def data_seperator(data, depend):
    # Stores dependant variable
    data_depend = data[depend]
    data = data.drop(depend, axis=1)
    
    # Stores categorical features
    select = [data.columns[i] for i in np.where(data.dtypes == 'O')[0]]
    data_categ = pd.DataFrame(data[select])
    
    # Stores all numerical features
    data_num = data.drop(select, axis=1)
    
    # Seperating time features from numerical features
    select = [i for i in data_num.columns if 'year' in i.lower() or 'yr' in i.lower() or 'day' in i or 'week' in i.lower()]
    data_time = pd.DataFrame(data_num[select])
    data_num = data_num.drop(select, axis=1)
    
    # Seperating discrete and continuous features
    select = [feature for feature in data_num if len(data_num[feature].unique()) < 10]
    data_discrete = pd.DataFrame(data_num[select])
    data_continuous = data_num.drop(select, axis=1)
    
    # Returning the features
    return data_num, data_discrete, data_continuous, data_categ, data_time, data_depend

## Dataset Information
I scraped the banners info from Alchemia Story official site.

- Source: https://en.alchemiastory.jp/information/?page=1&type_code=notice

## Working Directory

In [3]:
old_path = os.getcwd()
os.chdir('..')
path = os.getcwd()

## Reading Data

In [4]:
data = pd.read_csv(f'{path}/data/raw/train.csv')
data.head()

Unnamed: 0,Gacha_Name,Date
0,Battle on Board Gacha,2022-10-14
1,Heart Labyrinth Gacha,2022-10-14
2,"Increases MDEF!""Sorcerer Student Council Gacha...",2022-10-14
3,"Step-up style ""Sorcerer Student Council Gacha""...",2022-10-14
4,Nightmare Gacha,2022-10-13


## Data Information

In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1540 entries, 0 to 1539
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Gacha_Name  1540 non-null   object
 1   Date        1540 non-null   object
dtypes: object(2)
memory usage: 24.2+ KB


## Separating the data
Seperated the data into different variables for ease of access.

In [6]:
data_num, data_discrete, data_continuous, data_categ, data_time, data_depend = data_seperator(data, 'Gacha_Name')

## Data Description

In [7]:
data.describe()

Unnamed: 0,Gacha_Name,Date
count,1540,1540
unique,1253,792
top,Expansion items are now on time-sale!,2022-08-20
freq,17,15


In [8]:
data.columns

Index(['Gacha_Name', 'Date'], dtype='object')

In [9]:
data['Year'] = data.Date.str[0:4]

In [10]:
data['Month'] = data.Date.str[5:7]

In [11]:
data['Day'] = data.Date.str[8:]

In [12]:
data = data.drop('Date', axis=1)

In [13]:
data.head()

Unnamed: 0,Gacha_Name,Year,Month,Day
0,Battle on Board Gacha,2022,10,14
1,Heart Labyrinth Gacha,2022,10,14
2,"Increases MDEF!""Sorcerer Student Council Gacha...",2022,10,14
3,"Step-up style ""Sorcerer Student Council Gacha""...",2022,10,14
4,Nightmare Gacha,2022,10,13


In [14]:
# Finding specific banner using name
selected = []
for i in data['Gacha_Name']:
    if 'lr acc' in i.lower():
        selected.append(i)
        
selected

['「UPDATE 8/26 15:25 (JST)」Win LR Accessory!? "Traditional Jewelry Gacha" Arrives!',
 'Win LR Accessory!? "Traditional Jewelry Gacha" Arrives!',
 'Win LR Accessory!? "Traditional Jewelry Gacha" Arrives!',
 'Win LR Accessory!? "Traditional Jewelry Gacha" Arrives!',
 'Win LR Accessory!? "Traditional Jewelry Gacha" Arrives!',
 'LR Accessory G 2021Tix',
 '【10/22Update】Win LR Accessory!? "Traditional Jewelry Gacha" Arrives!',
 '[8/20 19:20 JST Edit]Win LR Accessory!? "Traditional Jewelry Gacha" Arrives!',
 '【6/28 Edit】Win LR Accessory!? "Traditional Jewelry Gacha" Arrives!']

In [15]:
# Changing Gacha Name
data['Gacha_Name'][152] = "Win LR Accessory!? \"Traditional Jewelry Gacha\" Arrives!"

In [16]:
# Data of selected Gacha
data[data['Gacha_Name'] == 'Win LR Accessory!? "Traditional Jewelry Gacha" Arrives!']

Unnamed: 0,Gacha_Name,Year,Month,Day
152,"Win LR Accessory!? ""Traditional Jewelry Gacha""...",2022,8,26
261,"Win LR Accessory!? ""Traditional Jewelry Gacha""...",2022,6,24
308,"Win LR Accessory!? ""Traditional Jewelry Gacha""...",2022,4,26
365,"Win LR Accessory!? ""Traditional Jewelry Gacha""...",2022,2,23
378,"Win LR Accessory!? ""Traditional Jewelry Gacha""...",2022,1,29


In [17]:
os.chdir(old_path)

## Conclusions
- Data is not enough for predicting armor gacha
- If more data is collected over time then it is possible
- LR accessory gacha and some other gachas can be predicted but again data is very low amount