In [1]:
import pandas as pd

In [6]:
data={
    'Name':['Ram','Shyam','Sita','Hari'],
    'Age':[55,35,25,36],
    'City':['KTM','Butwal','Bharatpur','Pokhara']
}

In [7]:
df=pd.DataFrame(data)
print(df)

    Name  Age       City
0    Ram   55        KTM
1  Shyam   35     Butwal
2   Sita   25  Bharatpur
3   Hari   36    Pokhara


In [8]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    4 non-null      object
 1   Age     4 non-null      int64 
 2   City    4 non-null      object
dtypes: int64(1), object(2)
memory usage: 224.0+ bytes
None


In [9]:
print(df.describe())

             Age
count   4.000000
mean   37.750000
std    12.526638
min    25.000000
25%    32.500000
50%    35.500000
75%    40.750000
max    55.000000


In [10]:
df['Age']

0    55
1    35
2    25
3    36
Name: Age, dtype: int64

# LOC (Label-based Indexing)

In [12]:
import pandas as pd
df=pd.DataFrame({
    'Name':['Ram','Shyam','Sita','Hari'],
    'Age':[55,35,25,36],
    'City':['KTM','Butwal','Bharatpur','Pokhara']
},index=['a','b','c','d'])
print(df)

    Name  Age       City
a    Ram   55        KTM
b  Shyam   35     Butwal
c   Sita   25  Bharatpur
d   Hari   36    Pokhara


# output as csv file

In [16]:
df.to_csv('demo1.csv',index=False) # don't show a,b,c,d index in output

In [17]:
print(df.loc['b'])

Name     Shyam
Age         35
City    Butwal
Name: b, dtype: object


In [20]:
print(df.loc['b',['Name','Age']])

Name    Shyam
Age        35
Name: b, dtype: object


In [25]:
print(df.loc['a':'d'])

    Name  Age       City
a    Ram   55        KTM
b  Shyam   35     Butwal
c   Sita   25  Bharatpur
d   Hari   36    Pokhara


# Integer Position based indexing

In [29]:
data={
    'Name':['Ram','Shyam','Sita','Hari'],
    'Age':[55,35,25,36],
    'City':['KTM','Butwal','Bharatpur','Pokhara']
}
df=pd.DataFrame(data)

print(df.iloc[1])

Name     Shyam
Age         35
City    Butwal
Name: 1, dtype: object


In [31]:
print(df.iloc[[0,3]]) #First and forth index value

   Name  Age     City
0   Ram   55      KTM
3  Hari   36  Pokhara


In [34]:
print(df.iloc[1,1]) #1 index 1th value(second row, second item)

35


# FIltering

In [37]:
adults=df[df['Age'] > 30] #filtering by age greater than 30
print(adults)

    Name  Age     City
0    Ram   55      KTM
1  Shyam   35   Butwal
3   Hari   36  Pokhara


# Data Append and Sorting

In [38]:
df['Country']='Nepal'

In [39]:
print(df)

    Name  Age       City Country
0    Ram   55        KTM   Nepal
1  Shyam   35     Butwal   Nepal
2   Sita   25  Bharatpur   Nepal
3   Hari   36    Pokhara   Nepal


In [43]:
df['Age_in_months']=df['Age'] *12 # COnvert age into months
print(df)

    Name  Age       City Country  Age_in_months
0    Ram   55        KTM   Nepal            660
1  Shyam   35     Butwal   Nepal            420
2   Sita   25  Bharatpur   Nepal            300
3   Hari   36    Pokhara   Nepal            432


In [48]:
df=df.rename(columns={'Name':'Full_Name'}) # Rename name to fullname
print(df)

  Full_Name  Age       City Country  Age_in_months
0       Ram   55        KTM   Nepal            660
1     Shyam   35     Butwal   Nepal            420
2      Sita   25  Bharatpur   Nepal            300
3      Hari   36    Pokhara   Nepal            432


In [54]:
df_sort=df.sort_values(by='Age',ascending=False) #Sorting in ascending order
print(df_sort)

  Full_Name  Age       City Country  Age_in_months
0       Ram   55        KTM   Nepal            660
3      Hari   36    Pokhara   Nepal            432
1     Shyam   35     Butwal   Nepal            420
2      Sita   25  Bharatpur   Nepal            300


# Grouping & Aggregration

In [56]:
df=pd.DataFrame({
    'Category':['A','B','A','B','A','C'],
    'Value':[10,20,15,25,30,4]
})
df

Unnamed: 0,Category,Value
0,A,10
1,B,20
2,A,15
3,B,25
4,A,30
5,C,4


In [60]:
grouped=df.groupby('Category').sum()
print(grouped)

          Value
Category       
A            55
B            45
C             4


In [62]:
result=df.groupby('Category').agg(['sum','mean','count','min','max'])
print(result)

         Value                         
           sum       mean count min max
Category                               
A           55  18.333333     3  10  30
B           45  22.500000     2  20  25
C            4   4.000000     1   4   4


# Handling Missing Data

In [2]:
import pandas as pd
import numpy as np

In [5]:
df=pd.DataFrame({
    'A':[1,2,np.nan,4],
    'B':[5,np.nan,np.nan,8],
    'C':['a','b','c',None]
})

In [6]:
df

Unnamed: 0,A,B,C
0,1.0,5.0,a
1,2.0,,b
2,,,c
3,4.0,8.0,


In [9]:
print(df.isna()) # check if the value is null,none

       A      B      C
0  False  False  False
1  False   True  False
2   True   True  False
3  False  False   True


In [21]:
print(f"Total Missing Values: {df.isna().sum().sum()}")

Total Missing Values: 4


In [23]:
print(f"Percentage Missing: {df.isna().mean().mean() * 100:.2f}%")

Percentage Missing: 33.33%


In [25]:
df_filled=df.fillna(0)
print(df_filled)

     A    B  C
0  1.0  5.0  a
1  2.0  0.0  b
2  0.0  0.0  c
3  4.0  8.0  0


# CSV to Dataframe

In [27]:
df=pd.read_csv('DATA.csv')

In [28]:
df

Unnamed: 0,Duration,Date,Pulse,Maxpulse,Calories
0,22,12/22/2024,169,182,1872
1,228,8/18/2024,18,92,1901
2,301,11/14/2024,146,143,363
3,381,1/10/2025,158,64,1665
4,315,6/30/2024,78,131,1
...,...,...,...,...,...
195,2,1/8/2025,73,162,61
196,169,5/23/2024,187,114,1484
197,5,5/11/2024,109,194,1356
198,343,5/12/2024,155,156,864


In [30]:
df.head() # first five data

Unnamed: 0,Duration,Date,Pulse,Maxpulse,Calories
0,22,12/22/2024,169,182,1872
1,228,8/18/2024,18,92,1901
2,301,11/14/2024,146,143,363
3,381,1/10/2025,158,64,1665
4,315,6/30/2024,78,131,1


In [32]:
df.tail() # last 5 data

Unnamed: 0,Duration,Date,Pulse,Maxpulse,Calories
195,2,1/8/2025,73,162,61
196,169,5/23/2024,187,114,1484
197,5,5/11/2024,109,194,1356
198,343,5/12/2024,155,156,864
199,110,10/30/2024,45,196,355


In [34]:
df.head(10) # read top 10 datas

Unnamed: 0,Duration,Date,Pulse,Maxpulse,Calories
0,22,12/22/2024,169,182,1872
1,228,8/18/2024,18,92,1901
2,301,11/14/2024,146,143,363
3,381,1/10/2025,158,64,1665
4,315,6/30/2024,78,131,1
5,363,11/17/2024,197,177,239
6,268,11/13/2024,85,24,1528
7,480,12/29/2024,105,78,1949
8,386,3/24/2025,121,24,1677
9,340,1/24/2025,129,180,816
