Pandas Dataframe

In [27]:
import numpy as np
import pandas as pd

In [28]:
# Creating a DataFrame from a dictionary
data = {
    'name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
    'age': [25, 30, 35, 40, 45],
    'city': ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix'],
    'Salary': [50000, 60000, 70000, 80000, 90000]
}

df = pd.DataFrame(data)
df

Unnamed: 0,name,age,city,Salary
0,Alice,25,New York,50000
1,Bob,30,Los Angeles,60000
2,Charlie,35,Chicago,70000
3,David,40,Houston,80000
4,Eve,45,Phoenix,90000


In [29]:
# Creating a DataFrame from a list of lists
data_list = [
    ['Alice', 25, 'New York', 50000],
    ['Bob', 30, 'Los Angeles', 60000],
    ['Charlie', 35, 'Chicago', 70000],
    ['David', 40, 'Houston', 80000],
    ['Eve', 45, 'Phoenix', 90000]
]

df2 = pd.DataFrame(data_list)
df2

Unnamed: 0,0,1,2,3
0,Alice,25,New York,50000
1,Bob,30,Los Angeles,60000
2,Charlie,35,Chicago,70000
3,David,40,Houston,80000
4,Eve,45,Phoenix,90000


In [30]:
# Adding column names to the DataFrame
df2.columns = ['name', 'age', 'city', 'Salary']
df2

Unnamed: 0,name,age,city,Salary
0,Alice,25,New York,50000
1,Bob,30,Los Angeles,60000
2,Charlie,35,Chicago,70000
3,David,40,Houston,80000
4,Eve,45,Phoenix,90000


In [31]:
# Accessing a column in the DataFrame
df2['age']

0    25
1    30
2    35
3    40
4    45
Name: age, dtype: int64

In [32]:
#accessing multiple columns
df2[['name', 'city']]

Unnamed: 0,name,city
0,Alice,New York
1,Bob,Los Angeles
2,Charlie,Chicago
3,David,Houston
4,Eve,Phoenix


In [33]:
# Adding a new column to the DataFrame
df2["designation"] = ["Data Scientist", "Data Analyst", "Data Engineer", "ML Engineer", "AI Researcher"]
df2

Unnamed: 0,name,age,city,Salary,designation
0,Alice,25,New York,50000,Data Scientist
1,Bob,30,Los Angeles,60000,Data Analyst
2,Charlie,35,Chicago,70000,Data Engineer
3,David,40,Houston,80000,ML Engineer
4,Eve,45,Phoenix,90000,AI Researcher


In [34]:
#droping a column from the DataFrame
df2.drop("designation", axis=1, inplace=True)
#axis = 1 means we want to drop a column, inplace = True means we want to drop the column from the original DataFrame, if we set inplace = False then it will return a new DataFrame with the column dropped but it will not change the original DataFrame
df2

Unnamed: 0,name,age,city,Salary
0,Alice,25,New York,50000
1,Bob,30,Los Angeles,60000
2,Charlie,35,Chicago,70000
3,David,40,Houston,80000
4,Eve,45,Phoenix,90000


In [35]:
#drop row from the DataFrame
df2.drop(0, axis=0, inplace=True)
#axis = 0 means we want to drop a row, inplace = True means we want to drop the row from the original DataFrame, if we set inplace = False then it will return a new DataFrame with the row dropped but it will not change the original DataFrame
df2

Unnamed: 0,name,age,city,Salary
1,Bob,30,Los Angeles,60000
2,Charlie,35,Chicago,70000
3,David,40,Houston,80000
4,Eve,45,Phoenix,90000


In [36]:
#selecting row
df2.loc[[1,2]]
#loc is used to select a row by its index label, in this case we are selecting the row with index label 1 which is the second row of the DataFrame

Unnamed: 0,name,age,city,Salary
1,Bob,30,Los Angeles,60000
2,Charlie,35,Chicago,70000


In [38]:
#selecting rows based on a condition
df2[df2['age'] > 32]
# In this case we are selecting the rows where the age is greater than 30


Unnamed: 0,name,age,city,Salary
2,Charlie,35,Chicago,70000
3,David,40,Houston,80000
4,Eve,45,Phoenix,90000


In [39]:
#selecting specific columns for specific rows
df.loc[0:3, ['name', 'age']]

Unnamed: 0,name,age
0,Alice,25
1,Bob,30
2,Charlie,35
3,David,40


In [41]:
#multiple conditions
df[(df['age'] > 30) & (df['Salary'] > 80000)]

Unnamed: 0,name,age,city,Salary
4,Eve,45,Phoenix,90000
