In [1]:
import numpy as np
import pandas as pd

### Creating a Dataframe

In [6]:
# Dataframe from dictionary
data = {
    'Name': ['Jhon', 'Anna', 'Peter', 'Linda'],
    'Age': [28, 34, 29, 42],
    'City': ['New York', 'Paris', 'Berlin', 'London'],
    'Salary': [65000, 70000, 62000, 85000]
}

In [7]:
df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age,City,Salary
0,Jhon,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


In [8]:
# Dataframe from list
data_list = [
    ['Jhon', 28, 'New York', 65000],
    ['Anna', 34, 'Paris', 70000],
    ['Peter', 29, 'Berlin', 62000],
    ['Linda', 42, 'London', 85000]
]
df2 = pd.DataFrame(data_list)
df2

Unnamed: 0,0,1,2,3
0,Jhon,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


In [9]:
# Add label in column
column = ['Name', 'Age', 'City', 'Salary']
df2 = pd.DataFrame(data_list, columns = column)
df2

Unnamed: 0,Name,Age,City,Salary
0,Jhon,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


In [13]:
df2['Name'] # target with column name, Not index number
df2[['Name', 'City']] # Double sqr bracket for multiple column

Unnamed: 0,Name,City
0,Jhon,New York
1,Anna,Paris
2,Peter,Berlin
3,Linda,London


### Creating a new column

In [27]:
df2["Designation"] = ["Doctor", "Eng.","Doctor", "Eng."] # Need exactly equal number of column
df2

Unnamed: 0,Name,Age,City,Salary,Designation
0,Jhon,28,New York,65000,Doctor
1,Anna,34,Paris,70000,Eng.
2,Peter,29,Berlin,62000,Doctor
3,Linda,42,London,85000,Eng.


### Remove Columns

In [28]:
df2.drop('Designation', axis = 1) # Need to give axis value correctly (Default axis = 0)
df2.drop('Designation', axis =1, inplace=True) # Remove from main data
df2

Unnamed: 0,Name,Age,City,Salary
0,Jhon,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


### Removing Columns

In [29]:
df2.drop(0, axis=0)


Unnamed: 0,Name,Age,City,Salary
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


In [30]:
df2

Unnamed: 0,Name,Age,City,Salary
0,Jhon,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


### Selecting Rows

In [34]:
df2.loc[0] # location to access row
df2.loc[[0,1]] # Access multiple rows

Unnamed: 0,Name,Age,City,Salary
0,Jhon,28,New York,65000
1,Anna,34,Paris,70000


In [36]:
df2.iloc[3] # index location

Name       Linda
Age           42
City      London
Salary     85000
Name: 3, dtype: object

### Selecting Subsets of Rows and Columns

In [38]:
df.loc[[0,1]][["City","Salary"]] # Select specific portion

Unnamed: 0,City,Salary
0,New York,65000
1,Paris,70000


### Conditional Selection

In [39]:
df2

Unnamed: 0,Name,Age,City,Salary
0,Jhon,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


In [41]:
# I only wants to see those people whose age is above 30
df2[df2["Age"] > 30]

Unnamed: 0,Name,Age,City,Salary
1,Anna,34,Paris,70000
3,Linda,42,London,85000


In [45]:
# I only want people whose age is above 30 and their city must be paris
df2[(df2["Age"] > 30) & (df2["City"] == "Paris")] # conditions must be in first bracket

Unnamed: 0,Name,Age,City,Salary
1,Anna,34,Paris,70000
