## Librerias

In [1]:
import pandas as pd
import numpy as np

In [2]:
pd.options.display.max_columns = False

## **1. Introducción a pandas**

### **1.1 Series**

<ul>
    <li>Son arreglos unidimensionales indexados.</li>
    <li>Nos permiten hacer slicing y buscar por el índice.</li>
    <li>Tambien nos permiten realizar operaciones matemáticas.</li>
    <li>Reciben distintos tipos de datos.</li>
</ul>

In [3]:
a = pd.Series(
    ['El ojo del mundo', 'La gran cacería', 'El dragón renacido', 'El ascenso de la sombra', 'El dragón renacido'],
    index=[1, 2, 3, 4, 5]
)
a

1           El ojo del mundo
2            La gran cacería
3         El dragón renacido
4    El ascenso de la sombra
5         El dragón renacido
dtype: object

### **1.2 DataFrames**

<ul>
    <li>Son la estructura principal de <strong>pandas</strong>.</li>
    <li>Son arreglos de dos dimensiones.</li>
    <li>Vienen indexados por lo que facilitan la búsqueda por columnas o filas.</li>
</ul>

#### **Leyendo archivos**

In [72]:
books = pd.read_csv('Data/bestsellers.csv')

In [5]:
characters = pd.read_json('Data/characters.json')

#### **Filtrando un DataFrame I**

In [6]:
# Hacer slicing con las filas
books[0:4]

Unnamed: 0,Name,Author,User Rating,Reviews,Price,Year,Genre
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8,2016,Non Fiction
1,11/22/63: A Novel,Stephen King,4.6,2052,22,2011,Fiction
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,18979,15,2018,Non Fiction
3,1984 (Signet Classics),George Orwell,4.7,21424,6,2017,Fiction


In [7]:
# Seleccionar por columnas
books[['Name', 'Author', 'Year']]

Unnamed: 0,Name,Author,Year
0,10-Day Green Smoothie Cleanse,JJ Smith,2016
1,11/22/63: A Novel,Stephen King,2011
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,2018
3,1984 (Signet Classics),George Orwell,2017
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,2019
...,...,...,...
545,Wrecking Ball (Diary of a Wimpy Kid Book 14),Jeff Kinney,2019
546,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,2016
547,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,2017
548,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,2018


#### **Utilizando Loc**

In [8]:
# Usando Loc
books.loc[0:4, ['Name', 'Author']]

Unnamed: 0,Name,Author
0,10-Day Green Smoothie Cleanse,JJ Smith
1,11/22/63: A Novel,Stephen King
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson
3,1984 (Signet Classics),George Orwell
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids


In [9]:
# Usando Loc
books.loc[:, ['Reviews']]

Unnamed: 0,Reviews
0,17350
1,2052
2,18979
3,21424
4,7665
...,...
545,9413
546,14331
547,14331
548,14331


#### **Utilizando Iloc**

In [10]:
books.iloc[:5, 0:3]

Unnamed: 0,Name,Author,User Rating
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7
1,11/22/63: A Novel,Stephen King,4.6
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7
3,1984 (Signet Classics),George Orwell,4.7
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8


#### **Agregar datos**

In [37]:
# Agregar columnas
books['Nueva columna'] = np.nan

In [36]:
data = np.arange(0, len(books))
books['Rango'] = data

In [38]:
books.head(3)

Unnamed: 0,Name,Author,User Rating,Reviews,Price,Genre,Nueva columna,Rango
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8,Non Fiction,,0
1,11/22/63: A Novel,Stephen King,4.6,2052,22,Fiction,,1
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,18979,15,Non Fiction,,2


In [41]:
# Agregar filas
books.append(books).head()

Unnamed: 0,Name,Author,User Rating,Reviews,Price,Genre,Nueva columna,Rango
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8,Non Fiction,,0
1,11/22/63: A Novel,Stephen King,4.6,2052,22,Fiction,,1
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,18979,15,Non Fiction,,2
3,1984 (Signet Classics),George Orwell,4.7,21424,6,Fiction,,3
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,7665,12,Non Fiction,,4


In [47]:
books.append(
    {
        'Name': 'The dragon reborn',
        'Author': 'Robert Jordan',
        'Genre': 'Fiction'
    }, 
    ignore_index=True)

Unnamed: 0,Name,Author,User Rating,Reviews,Price,Genre,Nueva columna,Rango
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350.0,8.0,Non Fiction,,0.0
1,11/22/63: A Novel,Stephen King,4.6,2052.0,22.0,Fiction,,1.0
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,18979.0,15.0,Non Fiction,,2.0
3,1984 (Signet Classics),George Orwell,4.7,21424.0,6.0,Fiction,,3.0
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,7665.0,12.0,Non Fiction,,4.0
...,...,...,...,...,...,...,...,...
546,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331.0,8.0,Non Fiction,,546.0
547,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331.0,8.0,Non Fiction,,547.0
548,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331.0,8.0,Non Fiction,,548.0
549,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331.0,8.0,Non Fiction,,549.0


#### **Eliminar datos**

In [14]:
# Eliminar columnas
books.drop('Genre', axis=1).head()

Unnamed: 0,Name,Author,User Rating,Reviews,Price,Year
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8,2016
1,11/22/63: A Novel,Stephen King,4.6,2052,22,2011
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,18979,15,2018
3,1984 (Signet Classics),George Orwell,4.7,21424,6,2017
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,7665,12,2019


In [24]:
# Eliminar columnas II
del books['Year']

In [23]:
# Eliminar filas
books.drop([2,4], axis=0)

Unnamed: 0,Name,Author,User Rating,Reviews,Price,Year,Genre
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8,2016,Non Fiction
1,11/22/63: A Novel,Stephen King,4.6,2052,22,2011,Fiction
3,1984 (Signet Classics),George Orwell,4.7,21424,6,2017,Fiction
5,A Dance with Dragons (A Song of Ice and Fire),George R. R. Martin,4.4,12643,11,2011,Fiction
6,A Game of Thrones / A Clash of Kings / A Storm...,George R. R. Martin,4.7,19735,30,2014,Fiction
...,...,...,...,...,...,...,...
545,Wrecking Ball (Diary of a Wimpy Kid Book 14),Jeff Kinney,4.9,9413,8,2019,Fiction
546,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2016,Non Fiction
547,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2017,Non Fiction
548,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2018,Non Fiction


#### **Trabajando con valores nulos**

In [52]:
df = pd.DataFrame(
    {
        'Name': ['El dragon renacido', 'El señor del Caos', 'Un recuerdo de Luz', np.nan],
        'Número': [3, np.nan, 14, 0],
        'Año': [2002, 2004, None, 2010]
    }
)

In [56]:
# Identificar los valores nulos
df.isna()

Unnamed: 0,Name,Número,Año
0,False,False,False
1,False,True,False
2,False,False,True
3,True,False,False


In [59]:
# Convertirlos en booleanos
df.isna()*1

Unnamed: 0,Name,Número,Año
0,0,0,0
1,0,1,0
2,0,0,1
3,1,0,0


In [60]:
# Reemplazando los valores nulos
df.fillna('Missing')

Unnamed: 0,Name,Número,Año
0,El dragon renacido,3.0,2002.0
1,El señor del Caos,Missing,2004.0
2,Un recuerdo de Luz,14.0,Missing
3,Missing,0.0,2010.0


In [63]:
# Eliminar valores nulos
df.dropna()

Unnamed: 0,Name,Número,Año
0,El dragon renacido,3.0,2002.0


### **1.3 Principales funciones de pandas**

#### **Funciones básicas**

In [75]:
# Conocer información del DataFrame
books.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 550 entries, 0 to 549
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Name         550 non-null    object 
 1   Author       550 non-null    object 
 2   User Rating  550 non-null    float64
 3   Reviews      550 non-null    int64  
 4   Price        550 non-null    int64  
 5   Year         550 non-null    int64  
 6   Genre        550 non-null    object 
dtypes: float64(1), int64(3), object(3)
memory usage: 30.2+ KB


In [79]:
# Información estadística de las variables numericas
books.describe()

Unnamed: 0,User Rating,Reviews,Price,Year
count,550.0,550.0,550.0,550.0
mean,4.618364,11953.281818,13.1,2014.0
std,0.22698,11731.132017,10.842262,3.165156
min,3.3,37.0,0.0,2009.0
25%,4.5,4058.0,7.0,2011.0
50%,4.7,8580.0,11.0,2014.0
75%,4.8,17253.25,16.0,2017.0
max,4.9,87841.0,105.0,2019.0


In [83]:
# Ver el uso de memoria
books.memory_usage(deep=True)

Index            128
Name           59737
Author         39078
User Rating     4400
Reviews         4400
Price           4400
Year            4400
Genre          36440
dtype: int64

In [87]:
# Contar los registros por autor
books['Author'].value_counts()

Jeff Kinney                           12
Gary Chapman                          11
Rick Riordan                          11
Suzanne Collins                       11
American Psychological Association    10
                                      ..
Keith Richards                         1
Chris Cleave                           1
Alice Schertle                         1
Celeste Ng                             1
Adam Gasiewski                         1
Name: Author, Length: 248, dtype: int64

In [93]:
# Eliminar duplicados
books.drop_duplicates(subset='Author')

Unnamed: 0,Name,Author,User Rating,Reviews,Price,Year,Genre
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8,2016,Non Fiction
1,11/22/63: A Novel,Stephen King,4.6,2052,22,2011,Fiction
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,18979,15,2018,Non Fiction
3,1984 (Signet Classics),George Orwell,4.7,21424,6,2017,Fiction
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,7665,12,2019,Non Fiction
...,...,...,...,...,...,...,...
537,Wild: From Lost to Found on the Pacific Crest ...,Cheryl Strayed,4.4,17044,18,2012,Non Fiction
538,Winter of the World: Book Two of the Century T...,Ken Follett,4.5,10760,15,2012,Fiction
539,Women Food and God: An Unexpected Path to Almo...,Geneen Roth,4.2,1302,11,2010,Non Fiction
540,Wonder,R. J. Palacio,4.8,21625,9,2013,Fiction


In [100]:
books['Year'].skew()

0.0