# Data Frames

In [21]:
import numpy as numpy
import pandas as pd

Creating a Dataframes

In [22]:
# creating data frame from dictionary
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David'],
    'Age': [24, 27, 22, 32],
    'City': ['New York', 'Los Angeles', 'Chicago', 'Houston'],
    'Salary': [70000, 80000, 60000, 90000]
}
df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age,City,Salary
0,Alice,24,New York,70000
1,Bob,27,Los Angeles,80000
2,Charlie,22,Chicago,60000
3,David,32,Houston,90000


In [23]:
# another method to create dataframe  this is list.
data_list = [
    ['Alice', 24, 'New York', 70000],
    ['Bob', 27, 'Los Angeles', 80000],  
    ['Charlie', 22, 'Chicago', 60000],
    ['David', 32, 'Houston', 90000]
]
df2 = pd.DataFrame(data_list)
columns = ['Name', 'Age', 'City', 'Salary']
df2 = pd.DataFrame(data_list, columns = columns)
df2

Unnamed: 0,Name,Age,City,Salary
0,Alice,24,New York,70000
1,Bob,27,Los Angeles,80000
2,Charlie,22,Chicago,60000
3,David,32,Houston,90000


selection and indexing of Columns

In [24]:
df2

Unnamed: 0,Name,Age,City,Salary
0,Alice,24,New York,70000
1,Bob,27,Los Angeles,80000
2,Charlie,22,Chicago,60000
3,David,32,Houston,90000


In [25]:
df2['Name']

0      Alice
1        Bob
2    Charlie
3      David
Name: Name, dtype: object

In [26]:
df2['City']

0       New York
1    Los Angeles
2        Chicago
3        Houston
Name: City, dtype: object

In [27]:
# df2['Name', 'City'] # this will give error. use double square brackets [[]]
df2[['Name', 'City']]

Unnamed: 0,Name,City
0,Alice,New York
1,Bob,Los Angeles
2,Charlie,Chicago
3,David,Houston


Creating a new column

In [28]:
df2["Designation"] = ['Data Scientist', 'Developer', 'Analyst', 'Manager'] # it should exactly match the number of rows
df2

Unnamed: 0,Name,Age,City,Salary,Designation
0,Alice,24,New York,70000,Data Scientist
1,Bob,27,Los Angeles,80000,Developer
2,Charlie,22,Chicago,60000,Analyst
3,David,32,Houston,90000,Manager


Removing columns

In [29]:
df2.drop('Designation', axis=1) # axis=1 for column, axis=0 for row

Unnamed: 0,Name,Age,City,Salary
0,Alice,24,New York,70000
1,Bob,27,Los Angeles,80000
2,Charlie,22,Chicago,60000
3,David,32,Houston,90000


In [30]:
df2 # still the column is there because we have not assigned the result to df2 or used inplace=True

Unnamed: 0,Name,Age,City,Salary,Designation
0,Alice,24,New York,70000,Data Scientist
1,Bob,27,Los Angeles,80000,Developer
2,Charlie,22,Chicago,60000,Analyst
3,David,32,Houston,90000,Manager


In [31]:
df2.drop('Designation', axis=1, inplace=True) # axis=1 for column, axis=0 for row
df2

Unnamed: 0,Name,Age,City,Salary
0,Alice,24,New York,70000
1,Bob,27,Los Angeles,80000
2,Charlie,22,Chicago,60000
3,David,32,Houston,90000


In [32]:
df2.drop(["City", "Salary"], axis=1)

Unnamed: 0,Name,Age
0,Alice,24
1,Bob,27
2,Charlie,22
3,David,32


In [33]:
df2

Unnamed: 0,Name,Age,City,Salary
0,Alice,24,New York,70000
1,Bob,27,Los Angeles,80000
2,Charlie,22,Chicago,60000
3,David,32,Houston,90000


In [34]:
df2.drop(0, axis=0) # drop first row

Unnamed: 0,Name,Age,City,Salary
1,Bob,27,Los Angeles,80000
2,Charlie,22,Chicago,60000
3,David,32,Houston,90000


Selecting row

In [35]:
df2.loc[0] # selecting row using label

Name         Alice
Age             24
City      New York
Salary       70000
Name: 0, dtype: object

In [38]:
df2.loc[[0, 1]] # selecting multiple rows using label

Unnamed: 0,Name,Age,City,Salary
0,Alice,24,New York,70000
1,Bob,27,Los Angeles,80000


In [39]:
df2.iloc[3] # selecting row using index

Name        David
Age            32
City      Houston
Salary      90000
Name: 3, dtype: object

Selecting subset of rows and columns

In [42]:
df2.loc[[0, 1]][['City', 'Salary']]

Unnamed: 0,City,Salary
0,New York,70000
1,Los Angeles,80000


In [43]:
df2.loc[[2, 3], ['Name', 'Age']]

Unnamed: 0,Name,Age
2,Charlie,22
3,David,32


Conditional Selection

In [46]:
# i only want to see those people whose age is greater than 25
df2[df2['Age'] > 25]

Unnamed: 0,Name,Age,City,Salary
1,Bob,27,Los Angeles,80000
3,David,32,Houston,90000


In [47]:
# i only want to see those people whose age is greater than 25 and city is Houston
df2[(df2['Age'] > 25) & (df2['City'] == 'Houston')]

Unnamed: 0,Name,Age,City,Salary
3,David,32,Houston,90000
