# Pandas - DataFrames

In [1]:
import pandas as pd
import numpy as np

In [3]:
# Create a dictionary of data

data = {
    "Name": ["Alice", "Bob", "Charlie", "Diana", "Eve"],
    "Age": [25, 30, 28, 22, 27],
    "City": ["New York", "London", "Paris", "Berlin", "Tokyo"],
    "Occupation": ["Doctor", "Engineer", "Teacher", "Student", "Artist"],
    "Hobbies": ["Reading", "Sports", "Music", "Travel", "Painting"],
    "Favorite_Color": ["Blue", "Green", "Red", "Yellow", "Purple"],
    "ID": [1234, 5678, 9012, 3456, 7890]
}

# Create a dataframe from the dictionary

df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age,City,Occupation,Hobbies,Favorite_Color,ID
0,Alice,25,New York,Doctor,Reading,Blue,1234
1,Bob,30,London,Engineer,Sports,Green,5678
2,Charlie,28,Paris,Teacher,Music,Red,9012
3,Diana,22,Berlin,Student,Travel,Yellow,3456
4,Eve,27,Tokyo,Artist,Painting,Purple,7890


In [4]:
df.columns

Index(['Name', 'Age', 'City', 'Occupation', 'Hobbies', 'Favorite_Color', 'ID'], dtype='object')

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Name            5 non-null      object
 1   Age             5 non-null      int64 
 2   City            5 non-null      object
 3   Occupation      5 non-null      object
 4   Hobbies         5 non-null      object
 5   Favorite_Color  5 non-null      object
 6   ID              5 non-null      int64 
dtypes: int64(2), object(5)
memory usage: 412.0+ bytes


In [6]:
df.index

RangeIndex(start=0, stop=5, step=1)

In [11]:
df.describe()

Unnamed: 0,Age,ID
count,5.0,5.0
mean,26.4,5454.0
std,3.04959,3179.781439
min,22.0,1234.0
25%,25.0,3456.0
50%,27.0,5678.0
75%,28.0,7890.0
max,30.0,9012.0


In [9]:
df.size

35

In [10]:
df.shape

(5, 7)

In [17]:
df.loc['Alice']

Name                 Alice
Age                     25
City              New York
Occupation          Doctor
Hobbies            Reading
Favorite_Color        Blue
ID                    1234
Name: Alice, dtype: object

In [16]:
df

Unnamed: 0,Name,Age,City,Occupation,Hobbies,Favorite_Color,ID
Alice,Alice,25,New York,Doctor,Reading,Blue,1234
Bob,Bob,30,London,Engineer,Sports,Green,5678
Charlie,Charlie,28,Paris,Teacher,Music,Red,9012
Diana,Diana,22,Berlin,Student,Travel,Yellow,3456
Eve,Eve,27,Tokyo,Artist,Painting,Purple,7890


In [15]:
df.index = [
    'Alice',
    'Bob',
    'Charlie',
    'Diana',
    'Eve'
]

## Indexing, Selection and Slicing

In [18]:
df.loc['Bob']

Name                   Bob
Age                     30
City                London
Occupation        Engineer
Hobbies             Sports
Favorite_Color       Green
ID                    5678
Name: Bob, dtype: object

In [20]:
df.iloc[-1]

Name                   Eve
Age                     27
City                 Tokyo
Occupation          Artist
Hobbies           Painting
Favorite_Color      Purple
ID                    7890
Name: Eve, dtype: object

In [21]:
df['Bob']

KeyError: 'Bob'

In [22]:
df[1:3]

Unnamed: 0,Name,Age,City,Occupation,Hobbies,Favorite_Color,ID
Bob,Bob,30,London,Engineer,Sports,Green,5678
Charlie,Charlie,28,Paris,Teacher,Music,Red,9012


## Row level selection works better with loc and iloc which are recommended over regular "direct slicing" (df[:]).

loc selects rows matching the given index:

In [23]:
df.loc['Bob']

Name                   Bob
Age                     30
City                London
Occupation        Engineer
Hobbies             Sports
Favorite_Color       Green
ID                    5678
Name: Bob, dtype: object

In [26]:
df.loc['Bob': 'Diana', ['ID','City']]

Unnamed: 0,ID,City
Bob,5678,London
Charlie,9012,Paris
Diana,3456,Berlin


In [28]:
df.iloc[[1,3,4]]

Unnamed: 0,Name,Age,City,Occupation,Hobbies,Favorite_Color,ID
Bob,Bob,30,London,Engineer,Sports,Green,5678
Diana,Diana,22,Berlin,Student,Travel,Yellow,3456
Eve,Eve,27,Tokyo,Artist,Painting,Purple,7890


In [33]:
df.loc[df['Age'] > 25,'ID']

Bob        5678
Charlie    9012
Eve        7890
Name: ID, dtype: int64

In [36]:
df.drop('Bob')

Unnamed: 0,Name,Age,City,Occupation,Hobbies,Favorite_Color,ID
Alice,Alice,25,New York,Doctor,Reading,Blue,1234
Charlie,Charlie,28,Paris,Teacher,Music,Red,9012
Diana,Diana,22,Berlin,Student,Travel,Yellow,3456
Eve,Eve,27,Tokyo,Artist,Painting,Purple,7890


Adding Values

In [42]:
df.append(pd.Series({
    'Name': 'Joshua',
    'Age': 23,
    'City':'Kutus'
}))

AttributeError: 'DataFrame' object has no attribute 'append'