# Pandas Basics

## Importing Libraries


In [54]:
import pandas as pd
import numpy as np

In [55]:
df = pd.read_csv("people.csv") # pd.read_csv reads the csv file i created.
df #when u call df, You can see the data below.

Unnamed: 0,NAME,AGE,CITY,SALARY
0,Aisha,22,Bangalore,25000
1,Ronak,24,Mumbai,30000
2,Abhi,28,Bangalore,33000
3,Zoe,21,Hyderabad,21000
4,Neha,30,Bangalore,45000
5,Arjun,24,Pune,50000
6,Aishwarya,26,Mumbai,18000
7,V,29,Pune,38000
8,Harman,18,Bangalore,33000


In [56]:
df.head() #this gives us the first 5 rows

Unnamed: 0,NAME,AGE,CITY,SALARY
0,Aisha,22,Bangalore,25000
1,Ronak,24,Mumbai,30000
2,Abhi,28,Bangalore,33000
3,Zoe,21,Hyderabad,21000
4,Neha,30,Bangalore,45000


In [57]:
df.tail() #gives us the last 5

Unnamed: 0,NAME,AGE,CITY,SALARY
4,Neha,30,Bangalore,45000
5,Arjun,24,Pune,50000
6,Aishwarya,26,Mumbai,18000
7,V,29,Pune,38000
8,Harman,18,Bangalore,33000


In [58]:
df.info() #This gives us the whole information of ur dataset.

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9 entries, 0 to 8
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   NAME    9 non-null      object
 1   AGE     9 non-null      int64 
 2   CITY    9 non-null      object
 3   SALARY  9 non-null      object
dtypes: int64(1), object(3)
memory usage: 420.0+ bytes


In [59]:
df.shape #The shape is the number of rows and columns of the DataFrame.

(9, 4)

In [60]:
df.columns #gives us the columns

Index(['NAME', 'AGE', 'CITY', 'SALARY'], dtype='object')

In [61]:
df['AGE'] #Accesing the columns, by just giving column name.

Unnamed: 0,AGE
0,22
1,24
2,28
3,21
4,30
5,24
6,26
7,29
8,18


In [62]:
df[['AGE','NAME']] #We can access more columns together too.

Unnamed: 0,AGE,NAME
0,22,Aisha
1,24,Ronak
2,28,Abhi
3,21,Zoe
4,30,Neha
5,24,Arjun
6,26,Aishwarya
7,29,V
8,18,Harman


## IMPORTANT STATS

In [63]:
df['AGE'].mean() #This gives us the mean of the column

np.float64(24.666666666666668)

In [64]:
df['AGE'].max() #The max value in that column

30

In [65]:
df['AGE'].min() #gives the Minimum value

18

In [66]:
df['AGE'].sum() #Gives us the sum

np.int64(222)

In [67]:
df['AGE'].count()

np.int64(9)

## FILTERING

In [68]:
df[df['AGE'] > 25] #This gives us the age of people who are above 25

Unnamed: 0,NAME,AGE,CITY,SALARY
2,Abhi,28,Bangalore,33000
4,Neha,30,Bangalore,45000
6,Aishwarya,26,Mumbai,18000
7,V,29,Pune,38000


In [69]:
df[df["CITY"]== "Bangalore"] #Filters out people whose city is bangalore.

Unnamed: 0,NAME,AGE,CITY,SALARY
0,Aisha,22,Bangalore,25000
2,Abhi,28,Bangalore,33000
4,Neha,30,Bangalore,45000
8,Harman,18,Bangalore,33000


## Handling Missing Values


In [70]:
df.isnull()

Unnamed: 0,NAME,AGE,CITY,SALARY
0,False,False,False,False
1,False,False,False,False
2,False,False,False,False
3,False,False,False,False
4,False,False,False,False
5,False,False,False,False
6,False,False,False,False
7,False,False,False,False
8,False,False,False,False


In [71]:
df.dropna()

Unnamed: 0,NAME,AGE,CITY,SALARY
0,Aisha,22,Bangalore,25000
1,Ronak,24,Mumbai,30000
2,Abhi,28,Bangalore,33000
3,Zoe,21,Hyderabad,21000
4,Neha,30,Bangalore,45000
5,Arjun,24,Pune,50000
6,Aishwarya,26,Mumbai,18000
7,V,29,Pune,38000
8,Harman,18,Bangalore,33000


In [72]:
df.fillna(0) #Replaces missing values with 0

Unnamed: 0,NAME,AGE,CITY,SALARY
0,Aisha,22,Bangalore,25000
1,Ronak,24,Mumbai,30000
2,Abhi,28,Bangalore,33000
3,Zoe,21,Hyderabad,21000
4,Neha,30,Bangalore,45000
5,Arjun,24,Pune,50000
6,Aishwarya,26,Mumbai,18000
7,V,29,Pune,38000
8,Harman,18,Bangalore,33000


In [73]:
df.sort_values("SALARY") #Sorts the column mentioned in ascending manner.

Unnamed: 0,NAME,AGE,CITY,SALARY
6,Aishwarya,26,Mumbai,18000
3,Zoe,21,Hyderabad,21000
0,Aisha,22,Bangalore,25000
1,Ronak,24,Mumbai,30000
2,Abhi,28,Bangalore,33000
8,Harman,18,Bangalore,33000
7,V,29,Pune,38000
4,Neha,30,Bangalore,45000
5,Arjun,24,Pune,50000


In [74]:
df.sort_values("SALARY", ascending=False) #This doesnt sort it in ascending way, it does in descending.

Unnamed: 0,NAME,AGE,CITY,SALARY
5,Arjun,24,Pune,50000
4,Neha,30,Bangalore,45000
7,V,29,Pune,38000
2,Abhi,28,Bangalore,33000
8,Harman,18,Bangalore,33000
1,Ronak,24,Mumbai,30000
0,Aisha,22,Bangalore,25000
3,Zoe,21,Hyderabad,21000
6,Aishwarya,26,Mumbai,18000


##Creating new column

In [75]:
df["AGE_plus_5"] = df["AGE"]+ 5  #Creates a new column with AGE + 5

In [76]:
df["AGE_plus_5"]

Unnamed: 0,AGE_plus_5
0,27
1,29
2,33
3,26
4,35
5,29
6,31
7,34
8,23


## Creating Pandas Series.

In [77]:

data = np.array(['h','e','l','l','o']) #lets create an array named data.
s= pd.Series(data) # we call data using pd.Series() in s
print("Pandas Series:\n", s) # we call s to see the series created.

Pandas Series:
 0    h
1    e
2    l
3    l
4    o
dtype: object
