### Se importan las librerias

In [72]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from ydata_profiling import ProfileReport

# Se agrega estas lineas de codigo debido a que desde vsc no se puede visualizar los graficos, ignorar si se utiliza con jupyter notebook
plt.switch_backend('TkAgg')
sns.set(style="darkgrid")


### Se extrae e importan 2 df y se almacenan en variables diferentes, el cual se lee desde la wiki fandom del honkai. 

In [73]:
characterUrl = pd.read_html('https://honkai-star-rail.fandom.com/wiki/Character')
playableCharacters = characterUrl[1]
upcomingCharacters = characterUrl[2]

playableCharacters.head()

Unnamed: 0,Icon,Name,Rarity,Path,Combat Type
0,,Arlan,,Destruction,Lightning
1,,Asta,,Harmony,Fire
2,,Bailu,,Abundance,Lightning
3,,Blade,,Destruction,Wind
4,,Bronya,,Harmony,Wind


### Se dropean columnas no relevantes y/o vacias de los df

In [74]:
playableCharacters.drop(columns=['Icon', 'Rarity'], inplace=True)
upcomingCharacters.drop(columns=['Icon', 'Rarity'], inplace=True)
playableCharacters.head()

Unnamed: 0,Name,Path,Combat Type
0,Arlan,Destruction,Lightning
1,Asta,Harmony,Fire
2,Bailu,Abundance,Lightning
3,Blade,Destruction,Wind
4,Bronya,Harmony,Wind


In [75]:
upcomingCharacters.head()

Unnamed: 0,Name,Path,Combat Type
0,Argenti,Erudition,Physical
1,Dr. Ratio,The Hunt,Imaginary
2,Hanya,Harmony,Physical
3,Ruan Mei,Harmony,Ice
4,Xueyi,Destruction,Quantum


### Se modifica el codigo aca para agregar 1 columna a cada df, para poder diferenciar si son jugables o no actualmente para un posterior analisis

In [76]:
playableCharacters['Status'] = 'Playable'
upcomingCharacters['Status'] = 'Upcoming'

### Se unen los df para tener un consolidado con los personajes jugables y los proximos

In [77]:
characters_df = pd.concat([playableCharacters, upcomingCharacters], ignore_index=True)

In [78]:
characters_df.head()

Unnamed: 0,Name,Path,Combat Type,Status
0,Arlan,Destruction,Lightning,Playable
1,Asta,Harmony,Fire,Playable
2,Bailu,Abundance,Lightning,Playable
3,Blade,Destruction,Wind,Playable
4,Bronya,Harmony,Wind,Playable


### Se extrae mas informacion desde diferentes databases links para poder tener mas informacion relevante. En este caso se traen las estadisticas bases de cada personaje

In [79]:
statsUrl = pd.read_html('https://gamewith.net/honkai-starrail/article/show/38646')
statsCharacters = statsUrl[1]
statsCharacters.rename(columns={'Character': 'Name'}, inplace=True)
statsCharacters.head()

Unnamed: 0,Name,HP,ATK,DEF,SPD
0,Blade,1358,543,485,97
1,Luocha,1280,756,363,101
2,Yukong,917,599,374,107
3,Silver Wolf,1047,819,460,107
4,Jing Yuan,1164,698,485,99


### Se mergean los datos nuevos con el dataframe anterior

In [80]:
characters_df = pd.merge(characters_df, statsCharacters, on='Name')
characters_df.sample(5)

Unnamed: 0,Name,Path,Combat Type,Status,HP,ATK,DEF,SPD
17,Sampo,Nihility,Wind,Playable,1023,617,396,102
25,Yukong,Harmony,Imaginary,Playable,917,599,374,107
18,Seele,The Hunt,Quantum,Playable,931,640,363,115
11,Jing Yuan,Erudition,Lightning,Playable,1164,698,485,99
3,Blade,Destruction,Wind,Playable,1358,543,485,97


In [81]:
characters_df.describe()

Unnamed: 0,HP,ATK,DEF,SPD
count,26.0,26.0,26.0,26.0
mean,1091.038462,612.923077,444.192308,101.961538
std,169.899613,86.45689,74.813913,6.089209
min,846.0,476.0,330.0,90.0
25%,936.25,546.0,396.0,98.0
50%,1047.0,599.0,438.5,101.5
75%,1230.5,652.0,485.0,106.75
max,1397.0,819.0,654.0,115.0


In [82]:
characters_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26 entries, 0 to 25
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Name         26 non-null     object
 1   Path         26 non-null     object
 2   Combat Type  26 non-null     object
 3   Status       26 non-null     object
 4   HP           26 non-null     int64 
 5   ATK          26 non-null     int64 
 6   DEF          26 non-null     int64 
 7   SPD          26 non-null     int64 
dtypes: int64(4), object(4)
memory usage: 1.8+ KB


Se utiliza la libreria de ydata para obtener profile report, pero al parecer tiene varios bugs desde jupyter (esto no sucede desde vsc)

In [83]:
profileData = ProfileReport(characters_df, title='Honkai Impact 3rd Characters Profile Report', explorative=True)
profileData.to_file('HonkaiImpact3rdCharactersProfileReport.html')

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Summarize dataset: 100%|██████████| 33/33 [00:02<00:00, 11.22it/s, Completed]                   
Generate report structure: 100%|██████████| 1/1 [00:04<00:00,  4.37s/it]
Render HTML: 100%|██████████| 1/1 [00:00<00:00,  3.78it/s]
Export report to file: 100%|██████████| 1/1 [00:00<00:00, 499.86it/s]


# Arrancan las visualizaciones de datos para entender mas el dataset

In [84]:
sns.countplot(x='Path',
            data=characters_df)

<Axes: xlabel='Path', ylabel='count'>

Se observa la distribucion de los personajes segun su Path

In [93]:
sns.countplot(x='Combat Type',
            data=characters_df)

<Axes: xlabel='Combat Type', ylabel='count'>

### Se realizan varios scatterplots para ver si existe alguna relacion entre los stats de los personajes

In [85]:
characters_df.plot.scatter(x='HP', y='SPD')

<Axes: xlabel='HP', ylabel='SPD'>

In [86]:
characters_df.plot.scatter(x='HP', y='DEF')

<Axes: xlabel='HP', ylabel='DEF'>

In [87]:
characters_df.plot.scatter(x='ATK', y='SPD')

<Axes: xlabel='ATK', ylabel='SPD'>

In [88]:
characters_df.plot.scatter(x='HP', y='ATK')

<Axes: xlabel='HP', ylabel='ATK'>

In [89]:
characters_df.plot.scatter(x='DEF', y='SPD')

<Axes: xlabel='DEF', ylabel='SPD'>

Se puede observar que hay una alta correlacion entre SPD Y HP. Al ser mayor la velocidad de un personaje, menor es su cantidad de vida base. Al ser mayor su vida, su movimiento sera menor.

In [90]:
sns.regplot(x='SPD', y='HP', data=characters_df)

<Axes: xlabel='SPD', ylabel='HP'>

In [91]:
sns.scatterplot(x='SPD', y='HP', data=characters_df)

<Axes: xlabel='SPD', ylabel='HP'>

In [92]:
# Se agrega esta linea de codigo debido a que desde vsc no se puede visualizar los graficos, ignorar si se utiliza con jupyter notebook
plt.show()