In [1]:
import tensorflow as tf
from tensorflow import keras
import pandas as pd

In [2]:
# Leer el archivo
data = pd.read_csv('BBDD/expanded', header=None)

In [3]:
# Categorias de head
data_categories = ['cap-shape', 'cap-surface', 'cap-color', 'bruises', 'odor', 'gill-attachment', 'gill-spacing', 'gill-size',
                   'gill-color', 'stalk-shape', 'stalk-root', 'stalk-surface-above-ring', 'stalk-surface-below-ring',
                   'stalk-color-above-ring', 'stalk-color-below-ring', 'veil-type', 'veil-color', 'ring-number', 'ring-type',
                   'spore-print-color', 'population', 'habitat', 'extra-column']


# Agregar categorias a los datos 
data.columns = data_categories

In [4]:
# Imprimiendo datos
print(data)

                                              cap-shape cap-surface cap-color  \
0                                                EDIBLE      CONVEX    SMOOTH   
1                                                EDIBLE      CONVEX    SMOOTH   
2                                                EDIBLE      CONVEX    SMOOTH   
3                                                EDIBLE      CONVEX    SMOOTH   
4                                                EDIBLE      CONVEX    SMOOTH   
...                                                 ...         ...       ...   
8412                                             EDIBLE     KNOBBED    SMOOTH   
8413                                             EDIBLE     KNOBBED    SMOOTH   
8414                                             EDIBLE     KNOBBED    SMOOTH   
8415                                             EDIBLE     KNOBBED    SMOOTH   
8416  ----------------------------------------------...         NaN       NaN   

     bruises     odor gill-

In [5]:
# Imprimiendo las ultimas filas de data. 
# Por defecto, muestra las ultimas 5 filas,
# pero se puede especificar un número diferente de filas pasando un argumento dentro de los paréntesis, por ejemplo: tail(10)

print(data.tail())

                                              cap-shape cap-surface cap-color  \
8412                                             EDIBLE     KNOBBED    SMOOTH   
8413                                             EDIBLE     KNOBBED    SMOOTH   
8414                                             EDIBLE     KNOBBED    SMOOTH   
8415                                             EDIBLE     KNOBBED    SMOOTH   
8416  ----------------------------------------------...         NaN       NaN   

     bruises odor gill-attachment gill-spacing gill-size gill-color  \
8412   BROWN   NO            NONE     ATTACHED     CLOSE      BROAD   
8413   BROWN   NO            NONE     ATTACHED     CLOSE      BROAD   
8414   BROWN   NO            NONE     ATTACHED     CLOSE      BROAD   
8415   BROWN   NO            NONE     ATTACHED     CLOSE      BROAD   
8416     NaN  NaN             NaN          NaN       NaN        NaN   

     stalk-shape  ... stalk-color-above-ring stalk-color-below-ring veil-type  \
8412 

In [6]:
# Imprimiendo las primeras filas de data. Por defecto, muestra las primeras 5 filas,
# pero se puede especificar un número diferente de filas pasando un argumento dentro
# de los paréntesis, por ejemplo: head(10) 

print(data.head())

  cap-shape cap-surface cap-color bruises     odor gill-attachment  \
0    EDIBLE      CONVEX    SMOOTH   WHITE  BRUISES          ALMOND   
1    EDIBLE      CONVEX    SMOOTH   WHITE  BRUISES          ALMOND   
2    EDIBLE      CONVEX    SMOOTH   WHITE  BRUISES          ALMOND   
3    EDIBLE      CONVEX    SMOOTH   WHITE  BRUISES          ALMOND   
4    EDIBLE      CONVEX    SMOOTH   WHITE  BRUISES          ALMOND   

  gill-spacing gill-size gill-color stalk-shape  ... stalk-color-above-ring  \
0         FREE   CROWDED     NARROW       WHITE  ...                 SMOOTH   
1         FREE   CROWDED     NARROW       WHITE  ...                 SMOOTH   
2         FREE   CROWDED     NARROW        PINK  ...                 SMOOTH   
3         FREE   CROWDED     NARROW        PINK  ...                 SMOOTH   
4         FREE   CROWDED     NARROW       BROWN  ...                 SMOOTH   

  stalk-color-below-ring veil-type veil-color ring-number ring-type  \
0                  WHITE     WHIT

In [7]:
#El método describe() en Pandas genera estadísticas descriptivas para todas 
#las columnas numéricas del DataFrame, como la cantidad de valores, la media,
#la desviación estándar, los valores mínimo y máximo, y los percentiles (25%, 50%, y 75%).

print(data.describe())

       cap-shape cap-surface cap-color bruises  odor gill-attachment  \
count       8417        8416      8416    8416  8416            8416   
unique         3           6         4      10     2               9   
top       EDIBLE      CONVEX     SCALY   BROWN    NO            NONE   
freq        4488        3796      3268    2320  5040            3808   

       gill-spacing gill-size gill-color stalk-shape  ...  \
count          8416      8416       8416        8416  ...   
unique            2         2          2          12  ...   
top            FREE     CLOSE      BROAD        BUFF  ...   
freq           8200      6824       5880        1728  ...   

       stalk-color-above-ring stalk-color-below-ring veil-type veil-color  \
count                    8416                   8416      8416       8416   
unique                      4                      9         9          1   
top                    SMOOTH                  WHITE     WHITE    PARTIAL   
freq                     

In [8]:
#El método info() en Pandas muestra un resumen conciso del Data, 
#incluyendo información sobre la cantidad de filas y columnas, el tipo
#de datos de cada columna y la cantidad de valores no nulos.

print(data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8417 entries, 0 to 8416
Data columns (total 23 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   cap-shape                 8417 non-null   object
 1   cap-surface               8416 non-null   object
 2   cap-color                 8416 non-null   object
 3   bruises                   8416 non-null   object
 4   odor                      8416 non-null   object
 5   gill-attachment           8416 non-null   object
 6   gill-spacing              8416 non-null   object
 7   gill-size                 8416 non-null   object
 8   gill-color                8416 non-null   object
 9   stalk-shape               8416 non-null   object
 10  stalk-root                8416 non-null   object
 11  stalk-surface-above-ring  8416 non-null   object
 12  stalk-surface-below-ring  8416 non-null   object
 13  stalk-color-above-ring    8416 non-null   object
 14  stalk-color-below-ring  

In [13]:
# Itera sobre cada elemento (columna) en la lista data_categories
for column_name in data_categories:
    # Imprime el conteo de valores únicos en la columna correspondiente del DataFrame data
    print(data[column_name].value_counts(), "\n \n")

EDIBLE                                                                    4488
POISONOUS                                                                 3928
----------------------------------------------------------------------       1
Name: cap-shape, dtype: int64 
 

CONVEX     3796
FLAT       3292
KNOBBED     840
BELL        452
SUNKEN       32
CONICAL       4
Name: cap-surface, dtype: int64 
 

SCALY      3268
SMOOTH     2684
FIBROUS    2460
GROOVES       4
Name: cap-color, dtype: int64 
 

BROWN       2320
GRAY        2096
RED         1500
YELLOW      1072
WHITE       1040
BUFF         168
PINK         144
CINNAMON      44
PURPLE        16
GREEN         16
Name: bruises, dtype: int64 
 

NO         5040
BRUISES    3376
Name: odor, dtype: int64 
 

NONE        3808
FOUL        2160
FISHY        576
SPICY        576
ALMOND       400
ANISE        400
PUNGENT      256
CREOSOTE     192
MUSTY         48
Name: gill-attachment, dtype: int64 
 

FREE        8200
ATTACHED     216
Name: gil

In [None]:
# Rellenar los valores nulos con un valor específico (por ejemplo, 0)
data_filled = data.fillna(0)

# Imprimir el DataFrame después de rellenar los valores nulos
print(data_filled)