# pandas

In [1]:
# Pandas is a powerful and easy-to-use data analysis and manipulation library for Python

In [2]:
import pandas as pd
import numpy as np

In [4]:
# Creating Data Structures 
#Series

data = [10, 20, 30]
s = pd.Series(data)
print(s)


0    10
1    20
2    30
dtype: int64


In [13]:
# DataFrame

data = {
    'Name': ['Alice', 'Bob'],
    'Age': [25, 30]
}
df = pd.DataFrame(data)
print(df)


    Name  Age
0  Alice   25
1    Bob   30


In [14]:
import pandas as pd

df = pd.read_csv('data.csv')
print(df)


      Name  Age  Score
0    Alice   24     85
1      Bob   27     90
2  Charlie   22     88


In [15]:
#  Viewing Data

df.head()        # First 5 rows
df.tail()        # Last 5 rows
df.shape         # Rows, Columns
df.columns       # Column names
df.info()        # Info about dataset
df.describe()    # Summary statistics


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    3 non-null      object
 1   Age     3 non-null      int64 
 2   Score   3 non-null      int64 
dtypes: int64(2), object(1)
memory usage: 204.0+ bytes


Unnamed: 0,Age,Score
count,3.0,3.0
mean,24.333333,87.666667
std,2.516611,2.516611
min,22.0,85.0
25%,23.0,86.5
50%,24.0,88.0
75%,25.5,89.0
max,27.0,90.0


In [12]:

df.iloc[0]                     # Access row by index
df.loc[0]                      # Access row by label/index
df.loc[0, 'Name']              # Specific value


'Alice'

In [11]:
df['Name']
df['Age']
df[['Name', 'Score']]


Unnamed: 0,Name,Score
0,Alice,85
1,Bob,90
2,Charlie,88


In [16]:
# Filtering Data
df[df['Age'] > 25]             # Rows where Age > 25
df[(df['Age'] > 25) & (df['Name'] == 'Bob')]  # Multiple conditions



Unnamed: 0,Name,Age,Score
1,Bob,27,90


In [17]:
# Modifying Data

df['NewCol'] = df['Age'] + 5    # Add new column
df['Age'] = df['Age'] * 2       # Modify column
df.drop('NewCol', axis=1)       # Drop column
df.drop(0, axis=0)              # Drop row


Unnamed: 0,Name,Age,Score,NewCol
1,Bob,54,90,32
2,Charlie,44,88,27


In [18]:
# Handling Missing Values

df.isnull()                  # Check missing values
df.dropna()                  # Drop missing rows
df.fillna(0)                 # Fill missing with 0
df['Age'].fillna(df['Age'].mean())  # Fill with mean


0    48
1    54
2    44
Name: Age, dtype: int64

In [20]:
# Sorting

df.sort_values('Age')                  # Ascending
df.sort_values('Age', ascending=False)  # Descending



Unnamed: 0,Name,Age,Score,NewCol
1,Bob,54,90,32
0,Alice,48,85,29
2,Charlie,44,88,27
