In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from IPython.display import display

In [2]:
df = sns.load_dataset('titanic')

In [3]:
# --- TÍTULO GENERAL ---
print('~'*50)
print('EXPLORACIÓN INICIAL')
print('~'*50, '\n')

# --- Primeros y últimos registros ---
print(f'{'-'*30}\nPrimeros registros:\n{'-'*30}')
display(df.head(5))

print(f'{'-'*30}\nÚltimos registros:\n{'-'*30}')
display(df.tail(5))

print('='*50)

# --- Información, dimensión, columna y tipo de dato ---
print(f'{'-'*30}\nInformación:\n{'-'*30}')
display(df.info())

print(f'{'-'*30}\nDimensión:\n{'-'*30}')
display(df.shape)

print(f'{'-'*30}\nColumna y tipo de dato:\n{'-'*30}')
display(pd.DataFrame(df.dtypes, columns=['Tipo de dato']))

print('='*50)

# --- Descripción estadística, moda, rango y cardinalidad ---
print(f'{'-'*30}\nDescripción estadística, moda, rango y cardinalidad:\n{'-'*30}')
resumen = df.describe(include='all').T
resumen.insert(2, 'mode', df.mode().iloc[0])
resumen['range'] = df.select_dtypes(include=[np.number]).max() - df.select_dtypes(include=[np.number]).min()
resumen['unique'] = df.nunique()
display(resumen.drop(columns=['top', 'freq']))

print('='*50)

# --- Distribución ---
print(f'{'-'*30}\nDistribución:\n{'-'*30}')
for col in df.select_dtypes(include=['object']).columns:
    print(f'\n ° {col}:')
    display(pd.DataFrame({
        'Conteo': df[col].value_counts(), 
        'Porcentaje': (df[col].value_counts(normalize=True) * 100).round(2)
    }))

print('='*50)

# --- Nulos y duplicados ---
print(f'{'-'*30}\nNulos:\n{'-'*30}')
display(pd.DataFrame({
    'Conteo': df.isnull().sum(), 
    'Porcentaje': (df.isnull().mean() * 100).round(2)
}))

print(f'{'-'*30}\nDuplicados:\n{'-'*30}')
display(pd.DataFrame({
    'Conteo': [df.duplicated().sum()], 
    'Porcentaje': [(df.duplicated().mean() * 100).round(2)]
}))

print('='*50)

# --- Uso de memoria ---
print('Uso de memoria:')
display(df.memory_usage(deep=True).to_frame('Bytes'))

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
EXPLORACIÓN INICIAL
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 

------------------------------
Primeros registros:
------------------------------


Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


------------------------------
Últimos registros:
------------------------------


Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
886,0,2,male,27.0,0,0,13.0,S,Second,man,True,,Southampton,no,True
887,1,1,female,19.0,0,0,30.0,S,First,woman,False,B,Southampton,yes,True
888,0,3,female,,1,2,23.45,S,Third,woman,False,,Southampton,no,False
889,1,1,male,26.0,0,0,30.0,C,First,man,True,C,Cherbourg,yes,True
890,0,3,male,32.0,0,0,7.75,Q,Third,man,True,,Queenstown,no,True


------------------------------
Información:
------------------------------
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 15 columns):
 #   Column       Non-Null Count  Dtype   
---  ------       --------------  -----   
 0   survived     891 non-null    int64   
 1   pclass       891 non-null    int64   
 2   sex          891 non-null    object  
 3   age          714 non-null    float64 
 4   sibsp        891 non-null    int64   
 5   parch        891 non-null    int64   
 6   fare         891 non-null    float64 
 7   embarked     889 non-null    object  
 8   class        891 non-null    category
 9   who          891 non-null    object  
 10  adult_male   891 non-null    bool    
 11  deck         203 non-null    category
 12  embark_town  889 non-null    object  
 13  alive        891 non-null    object  
 14  alone        891 non-null    bool    
dtypes: bool(2), category(2), float64(2), int64(4), object(5)
memory usage: 80.7+ KB


None

------------------------------
Dimensión:
------------------------------


(891, 15)

------------------------------
Columna y tipo de dato:
------------------------------


Unnamed: 0,Tipo de dato
survived,int64
pclass,int64
sex,object
age,float64
sibsp,int64
parch,int64
fare,float64
embarked,object
class,category
who,object


------------------------------
Descripción estadística, moda, rango y cardinalidad:
------------------------------


Unnamed: 0,count,unique,mode,mean,std,min,25%,50%,75%,max,range
survived,891.0,2,0,0.383838,0.486592,0.0,0.0,0.0,1.0,1.0,1.0
pclass,891.0,3,3,2.308642,0.836071,1.0,2.0,3.0,3.0,3.0,2.0
sex,891.0,2,male,,,,,,,,
age,714.0,88,24.0,29.699118,14.526497,0.42,20.125,28.0,38.0,80.0,79.58
sibsp,891.0,7,0,0.523008,1.102743,0.0,0.0,0.0,1.0,8.0,8.0
parch,891.0,7,0,0.381594,0.806057,0.0,0.0,0.0,0.0,6.0,6.0
fare,891.0,248,8.05,32.204208,49.693429,0.0,7.9104,14.4542,31.0,512.3292,512.3292
embarked,889.0,3,S,,,,,,,,
class,891.0,3,Third,,,,,,,,
who,891.0,3,man,,,,,,,,


------------------------------
Distribución:
------------------------------

 ° sex:


Unnamed: 0_level_0,Conteo,Porcentaje
sex,Unnamed: 1_level_1,Unnamed: 2_level_1
male,577,64.76
female,314,35.24



 ° embarked:


Unnamed: 0_level_0,Conteo,Porcentaje
embarked,Unnamed: 1_level_1,Unnamed: 2_level_1
S,644,72.44
C,168,18.9
Q,77,8.66



 ° who:


Unnamed: 0_level_0,Conteo,Porcentaje
who,Unnamed: 1_level_1,Unnamed: 2_level_1
man,537,60.27
woman,271,30.42
child,83,9.32



 ° embark_town:


Unnamed: 0_level_0,Conteo,Porcentaje
embark_town,Unnamed: 1_level_1,Unnamed: 2_level_1
Southampton,644,72.44
Cherbourg,168,18.9
Queenstown,77,8.66



 ° alive:


Unnamed: 0_level_0,Conteo,Porcentaje
alive,Unnamed: 1_level_1,Unnamed: 2_level_1
no,549,61.62
yes,342,38.38


------------------------------
Nulos:
------------------------------


Unnamed: 0,Conteo,Porcentaje
survived,0,0.0
pclass,0,0.0
sex,0,0.0
age,177,19.87
sibsp,0,0.0
parch,0,0.0
fare,0,0.0
embarked,2,0.22
class,0,0.0
who,0,0.0


------------------------------
Duplicados:
------------------------------


Unnamed: 0,Conteo,Porcentaje
0,107,12.01


Uso de memoria:


Unnamed: 0,Bytes
Index,132
survived,7128
pclass,7128
sex,47851
age,7128
sibsp,7128
parch,7128
fare,7128
embarked,44514
class,1162
