In [1]:
import pandas as pd

# Importing Datasets

In [3]:
genshin = pd.read_csv("../Data/genshin.csv",encoding='latin1')

In [4]:
genstats = pd.read_csv("../Data/Genshinstats.csv")

In [5]:
genstats.rename(columns={'Weapon': 'weapon_type'}, inplace=True) # Renaming one of the column from the genstats's dataset, and now making it so it matches with the other dataset.

In [6]:
genstats # Shows the updated changes of the renaming.

Unnamed: 0,Character,Lv,Rarity,Element,weapon_type,Main role,Ascension,Base HP,Base ATK,Base DEF
0,Amber,1,4,Pyro,Bow,Sub DPS,ATK,793,19,50
1,Amber,20,4,Pyro,Bow,Sub DPS,ATK,2038,48,129
2,Amber,20,4,Pyro,Bow,Sub DPS,ATK,2630,62,167
3,Amber,40,4,Pyro,Bow,Sub DPS,ATK,3940,93,250
4,Amber,40,4,Pyro,Bow,Sub DPS,ATK,4361,103,277
...,...,...,...,...,...,...,...,...,...,...
569,Kokomi,70,5,Hydro,Catalyst,Healer,Hydro DMG,10306,179,503
570,Kokomi,70,5,Hydro,Catalyst,Healer,Hydro DMG,10945,190,534
571,Kokomi,80,5,Hydro,Catalyst,Healer,Hydro DMG,11885,207,580
572,Kokomi,80,5,Hydro,Catalyst,Healer,Hydro DMG,12524,218,611


# Merging Datasets and Summary Statistics

In [8]:
merged_genshin = pd.merge(genshin, genstats, on='weapon_type', how='inner') #Merging data using inner join.

## Explaination:
Using an inner join for this merge will help ensure that only characters with matching weapon_type in both datasets are included. This method avoids having any mismatched or incomplete data in the final dataset. 

In an inner join, no NA values are created because only matching entries from both datasets are included in the result. This ensures that all of the data in the final dataset is complete and consistent. 

In [10]:
num_columns = merged_genshin.shape[1] # Number of columns.
num_rows = merged_genshin.shape[0] # Number of rows
num_nulls = merged_genshin.isnull().sum().sum() # Number of null values.

print(f"Number of columns: {num_columns}")
print(f"Number of rows: {num_rows}")
print(f"Number of null values: {num_nulls}")


Number of columns: 90
Number of rows: 10010
Number of null values: 16324


In [11]:
summary_stats = merged_genshin.describe(include='all') # Statistical summary of the dataset.
null_values = merged_genshin.isnull().sum() # Number of null values for each column.
unique_values = merged_genshin.nunique() # Number of unique values for each column.
data_types = merged_genshin.dtypes # Data type of each column

print("Statistical Summary:\n", summary_stats)
print("\nNumber of Null Values:\n", null_values)
print("\nNumber of Unique Values:\n", unique_values)
print("\nData Types:\n", data_types)


Statistical Summary:
        character_name        rarity     region vision   arkhe weapon_type  \
count           10010  10010.000000       8484  10010    1134       10010   
unique             84           NaN          6      7       3           5   
top            Albedo           NaN  Mondstadt   Cryo  Pneuma       Sword   
freq              140           NaN       2240   1750     742        3780   
mean              NaN      4.576224        NaN    NaN     NaN         NaN   
std               NaN      0.494180        NaN    NaN     NaN         NaN   
min               NaN      4.000000        NaN    NaN     NaN         NaN   
25%               NaN      4.000000        NaN    NaN     NaN         NaN   
50%               NaN      5.000000        NaN    NaN     NaN         NaN   
75%               NaN      5.000000        NaN    NaN     NaN         NaN   
max               NaN      5.000000        NaN    NaN     NaN         NaN   

                model constellation         birthday 