# Creating DataFrames

In [1]:
import numpy as np
import pandas as pd

In [2]:
data = {
    'Name': ['John', 'Anna', 'Peter', 'Linda'],
    'Age': [28, 34, 29, 42],
    'City': ['New York', 'Paris', 'Berlin', 'London'],
    'Salary': [65000, 70000, 62000, 85000]
}
label = [1,2,3,4]
df = pd.DataFrame(data,index=label)

In [None]:
df

Unnamed: 0,Name,Age,City,Salary
1,John,28,New York,65000
2,Anna,34,Paris,70000
3,Peter,29,Berlin,62000
4,Linda,42,London,85000


In [32]:
data_list = [
    ['John', 28, 'New York', 65000],
    ['Anna', 34, 'Paris', 70000],
    ['Peter', 29, 'Berlin', 62000],
    ['Linda', 42, 'London', 85000]
]
labels = [1,2,3,4]
df2 = pd.DataFrame(data_list)
columns = ["Name","Age","City","Salary"]
df3 = pd.DataFrame(data_list, columns = columns,index=labels)

In [5]:
df2

Unnamed: 0,0,1,2,3
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


In [6]:
df3

Unnamed: 0,Name,Age,City,Salary
1,John,28,New York,65000
2,Anna,34,Paris,70000
3,Peter,29,Berlin,62000
4,Linda,42,London,85000


In [7]:
df3['Name']

1     John
2     Anna
3    Peter
4    Linda
Name: Name, dtype: object

In [8]:
df3['City']

1    New York
2       Paris
3      Berlin
4      London
Name: City, dtype: object

In [9]:
df3[["Name","City"]]

Unnamed: 0,Name,City
1,John,New York
2,Anna,Paris
3,Peter,Berlin
4,Linda,London


# Creating a new column

In [10]:
df3['Designation'] = ['Employee','Employee','Sales person','Doctor']

In [11]:
df3

Unnamed: 0,Name,Age,City,Salary,Designation
1,John,28,New York,65000,Employee
2,Anna,34,Paris,70000,Employee
3,Peter,29,Berlin,62000,Sales person
4,Linda,42,London,85000,Doctor


In [12]:
p1=df3.drop(1)
p2 = df3.drop('Designation',axis= 1)
print("Deleting row  .....")
print(p1)
print()
print("Deleting column: ....")
print(p2)
# by default axis = 0
# Point = By this there is no change in original dataframe it show only copy

Deleting row  .....
    Name  Age    City  Salary   Designation
2   Anna   34   Paris   70000      Employee
3  Peter   29  Berlin   62000  Sales person
4  Linda   42  London   85000        Doctor

Deleting column: ....
    Name  Age      City  Salary
1   John   28  New York   65000
2   Anna   34     Paris   70000
3  Peter   29    Berlin   62000
4  Linda   42    London   85000


In [13]:
df3


Unnamed: 0,Name,Age,City,Salary,Designation
1,John,28,New York,65000,Employee
2,Anna,34,Paris,70000,Employee
3,Peter,29,Berlin,62000,Sales person
4,Linda,42,London,85000,Doctor


In [14]:
 # if we want to change data from original dataframe
df3['Salary']=[30000,50000,40000,90000]

In [15]:
df3

Unnamed: 0,Name,Age,City,Salary,Designation
1,John,28,New York,30000,Employee
2,Anna,34,Paris,50000,Employee
3,Peter,29,Berlin,40000,Sales person
4,Linda,42,London,90000,Doctor


In [16]:
 # if we want to change data from the original dataframe then we have to do inplace = True
df3.drop('Salary',axis= 1,inplace=True) 

In [17]:
df3

Unnamed: 0,Name,Age,City,Designation
1,John,28,New York,Employee
2,Anna,34,Paris,Employee
3,Peter,29,Berlin,Sales person
4,Linda,42,London,Doctor


In [18]:
df3.drop('Designation',axis= 1,inplace=True) 

In [19]:
df3

Unnamed: 0,Name,Age,City
1,John,28,New York
2,Anna,34,Paris
3,Peter,29,Berlin
4,Linda,42,London


In [20]:
df3['Name'] = ['a','r','f','r']
# if we add same name column then it gets over write

In [21]:
df3

Unnamed: 0,Name,Age,City
1,a,28,New York
2,r,34,Paris
3,f,29,Berlin
4,r,42,London


In [22]:
# Loc : Location
# by default axis = 0 i.e. row wise
df3.loc[[1,2]]

Unnamed: 0,Name,Age,City
1,a,28,New York
2,r,34,Paris


In [23]:
df3.loc[[1,2]]

Unnamed: 0,Name,Age,City
1,a,28,New York
2,r,34,Paris


In [24]:
#iloc : index location
df3.iloc[1]

Name        r
Age        34
City    Paris
Name: 2, dtype: object

In [25]:
df3

Unnamed: 0,Name,Age,City
1,a,28,New York
2,r,34,Paris
3,f,29,Berlin
4,r,42,London


# Selecting Subsets of Rows and Columns

In [26]:
df3

Unnamed: 0,Name,Age,City
1,a,28,New York
2,r,34,Paris
3,f,29,Berlin
4,r,42,London


In [27]:
df3.loc[[1,2]][['City','Age']]

Unnamed: 0,City,Age
1,New York,28
2,Paris,34


In [28]:
df3.loc[[3,4]][["Name",'City']]

Unnamed: 0,Name,City
3,f,Berlin
4,r,London


# Conditional Selection

In [29]:
df3

Unnamed: 0,Name,Age,City
1,a,28,New York
2,r,34,Paris
3,f,29,Berlin
4,r,42,London


In [30]:
# If i only want to see whose age is above 30
df3[df3['Age']>30]

Unnamed: 0,Name,Age,City
2,r,34,Paris
4,r,42,London


In [31]:
# If i only want to see whose age is above 30 and the are from Parisabs
df3[(df3['Age']>30) & (df3["City"] == "Paris")]

Unnamed: 0,Name,Age,City
2,r,34,Paris
