##### Creating a DataFrame

In [4]:
import numpy as np
import pandas as pd

In [None]:
# Create a dataframe from dictionary, here keys will be column header and rest willl be records
data = {
    'Name': ['Suhas', 'Aniket', 'Ritesh', 'Rohit'],
    'Age': [24,23,25,22],
    'City': ['Pune', 'Satara', 'Mumbai', 'Dhule'],
    'Salary': [12000, 23000, 45000, 12590]
}
df = pd.DataFrame(data=data)
df

Unnamed: 0,Name,Age,City,Salary
0,Suhas,24,Pune,12000
1,Aniket,23,Satara,23000
2,Ritesh,25,Mumbai,45000
3,Rohit,22,Dhule,12590


In [None]:
# Create a dataframe using list, but here columns header will be index also since we have not provided it
data_list = [
    ['suhas',22,'Pune'],
    ['Aniket',23,'kolhapur'],
    ['Ram',45,'Mumbai']
]

df2 = pd.DataFrame(data_list)
df2

Unnamed: 0,0,1,2
0,suhas,22,Pune
1,Aniket,23,kolhapur
2,Ram,45,Mumbai


In [13]:
# Create a dataframe using list, Also pass column_name_list separately
data_list2 = [
    ['suhas',22,'Pune'],
    ['Aniket',23,'kolhapur'],
    ['Ram',45,'Mumbai']
]
column_names = ['Name','Age','PlaceOfBirth']
df3 = pd.DataFrame(data_list2, columns=column_names)
df3

Unnamed: 0,Name,Age,PlaceOfBirth
0,suhas,22,Pune
1,Aniket,23,kolhapur
2,Ram,45,Mumbai


##### Selection and Indexing of Columns

In [18]:
# Access any single column from dataframe
# syntax: df['column_name']

df3['Name']

0     suhas
1    Aniket
2       Ram
Name: Name, dtype: object

In [None]:
# Access multiple columns from dataframe --> list of list we have to provide
# syntax: df[['column1', 'column2']]

df3[['Name','PlaceOfBirth']]

Unnamed: 0,Name,PlaceOfBirth
0,suhas,Pune
1,Aniket,kolhapur
2,Ram,Mumbai


#### Creating New Column

In [21]:
# create new column in existing dataframe,
# syntax: df['new_column_name'] = ['val1','val2',..'valN'] #here lenght of datapoints must be equal to no. of rows in df

df3['Role'] = ['Python Dev', 'Data Scientist', 'AL/ML']

In [22]:
df3

Unnamed: 0,Name,Age,PlaceOfBirth,Role
0,suhas,22,Pune,Python Dev
1,Aniket,23,kolhapur,Data Scientist
2,Ram,45,Mumbai,AL/ML


#### Remove an existing column from Dataframe

In [None]:
df3.drop('Role')
#ERROR : "['Role'] not found in axis"

KeyError: "['Role'] not found in axis"

In [None]:
# NOTE: axis = 0 means HORIZONTAL
#       axis = 1 means VERTICAL 

# so we have to drop a columns, vertical taht is so we need to provide axis = 1
# then we will not get above error

df3.drop('PlaceOfBirth', axis=1)

Unnamed: 0,Name,Age,Role
0,suhas,22,Python Dev
1,Aniket,23,Data Scientist
2,Ram,45,AL/ML


In [None]:
# but again we print df3, we get 'PlaceOfBirth' columns but that was deleted in previous right?
df3

Unnamed: 0,Name,Age,PlaceOfBirth,Role
0,suhas,22,Pune,Python Dev
1,Aniket,23,kolhapur,Data Scientist
2,Ram,45,Mumbai,AL/ML


In [None]:
# # NOTE: 
# inplace=False: Creates and returns a new modified object without changing the original.
# inplace=True: Directly modifies the existing object without returning a new one.

In [None]:
# to drop column permantely, add keyword "inplace=True"
df3.drop('PlaceOfBirth', axis=1, inplace=True)

# To drop multiple columns --> write in list
# df3.drop(['PlaceOfBirth', 'Age'], axis=1, inplace=True)

In [31]:
df3

Unnamed: 0,Name,Age,Role
0,suhas,22,Python Dev
1,Aniket,23,Data Scientist
2,Ram,45,AL/ML


In [None]:
# To drop a row/record --> axis=0 and provide label/index like 0th,1st
df3.drop(0, axis=0)  # record of suhas is deleted

Unnamed: 0,Name,Age,Role
1,Aniket,23,Data Scientist
2,Ram,45,AL/ML


#### Selecting a Row

In [33]:
df

Unnamed: 0,Name,Age,City,Salary
0,Suhas,24,Pune,12000
1,Aniket,23,Satara,23000
2,Ritesh,25,Mumbai,45000
3,Rohit,22,Dhule,12590


In [40]:
df.loc[0]   # row with index label 0
df.loc[[0,1]]

Unnamed: 0,Name,Age,City,Salary
0,Suhas,24,Pune,12000
1,Aniket,23,Satara,23000


In [38]:
df.iloc[0]  # first row regardless of label

Name      Suhas
Age          24
City       Pune
Salary    12000
Name: 0, dtype: object

#### Selecting subsets of rows and columns 

In [41]:
df

Unnamed: 0,Name,Age,City,Salary
0,Suhas,24,Pune,12000
1,Aniket,23,Satara,23000
2,Ritesh,25,Mumbai,45000
3,Rohit,22,Dhule,12590


In [None]:
# NOTE: lets we have to print just Pune 12000, Satara 23000 from df menas subset,
# here we want both rows and columns so first select row using `loc` then followed by columns

df.loc[[0,1]][['City','Salary']]

Unnamed: 0,City,Salary
0,Pune,12000
1,Satara,23000


In [47]:
df.loc[[2,3]][['Name','Age']]

Unnamed: 0,Name,Age
2,Ritesh,25
3,Rohit,22


#### Conditional Selection
Conditional selection means filtering rows in a dataframe based on one or more conditions.

In [48]:
df

Unnamed: 0,Name,Age,City,Salary
0,Suhas,24,Pune,12000
1,Aniket,23,Satara,23000
2,Ritesh,25,Mumbai,45000
3,Rohit,22,Dhule,12590


In [None]:
# 1. Compare a single column
# Returns rows where age > 23.
df[df['Age'] > 23]

Unnamed: 0,Name,Age,City,Salary
0,Suhas,24,Pune,12000
2,Ritesh,25,Mumbai,45000


In [51]:
# 2. Equality check
# Only rows where city is Mumbai.
df[df['City'] == 'Mumbai']

Unnamed: 0,Name,Age,City,Salary
2,Ritesh,25,Mumbai,45000


In [52]:
# 3. Multiple conditions
# 3.1 & (AND)
# Q. Age > 22 AND city = Satara.
df[(df['Age'] > 22) & (df['City'] == 'Satara')]

Unnamed: 0,Name,Age,City,Salary
1,Aniket,23,Satara,23000


In [54]:
# 3.2 | (OR)
# Q. Age < 24 OR city = Mumbai.
df[(df['Age'] < 24) | (df['City'] == 'Mumbai')]

Unnamed: 0,Name,Age,City,Salary
1,Aniket,23,Satara,23000
2,Ritesh,25,Mumbai,45000
3,Rohit,22,Dhule,12590


In [56]:
# 3.3 ~ (NOT)
# Q. All rows except where city = 'Pune'.
df[~(df['City'] == 'Pune')]

Unnamed: 0,Name,Age,City,Salary
1,Aniket,23,Satara,23000
2,Ritesh,25,Mumbai,45000
3,Rohit,22,Dhule,12590


In [None]:
# 4. NOTE: Using isin() for multiple values
# Q. Rows where city is Mumbai or Pune.
df[df['City'].isin(['Pune', 'Dhule'])]

Unnamed: 0,Name,Age,City,Salary
0,Suhas,24,Pune,12000
3,Rohit,22,Dhule,12590
