# what is Pandas 

In [1]:
# Pandas is an open-source Python library used for data analysis and data manipulation. 
# It provides data structures and functions that simplify working with structured data like tables, databases, and Excel spreadsheets.

# Pandas is built on NumPy and integrates well with other libraries like Matplotlib, Seaborn, and SQL.

In [2]:
#  Pandas is an open-source python library used for data manipulation and analysis.
# It provide the powerful data structure like dataframe and series that makes it easy to work with structural data.


# Pandas is a Python library used for working with data sets.
# It has functions for analyzing, cleaning, exploring, and manipulating data.
# The name "Pandas" has a reference to both "Panel Data", and "Python Data Analysis" and was created by Wes McKinney in 2008.

In [3]:
# Key Features of Pandas

# 1.Data Handling:

# Efficiently manage large datasets.
# Supports handling missing data.

# 2.Data Structures:

# Series: A one-dimensional labeled array.
# DataFrame: A two-dimensional table-like structure (like an Excel sheet or SQL table).

# 3.Data Cleaning & Preprocessing:

# Handling missing values (dropna(), fillna()).
# Removing duplicates (drop_duplicates()).
# Changing data types (astype()).

# 4.Data Analysis & Transformation:

# Filtering and selecting data.
# Applying functions (apply(), map(), groupby()).
# Aggregation (sum(), mean(), count()).

# 5.Integration with Other Libraries:
# Works well with NumPy, Matplotlib, Seaborn, and SQL databases.

In [4]:
# Why Use Pandas?

# Pandas allows us to analyze big data and make conclusions based on statistical theories.
# Pandas can clean messy data sets, and make them readable and relevant.
# Relevant data is very important in data science.

In [5]:
# What Can Pandas Do?
# Pandas gives you answers about the data. Like:

# Is there a correlation between two or more columns?
# What is average value?
# Max value?
# Min value?
# Pandas are also able to delete rows that are not relevant, or contains wrong values, like empty or NULL values. This is called cleaning the data.

# Series 

In [6]:
# A Pandas Series is a one-dimensional labeled array that can hold data of any type (integer, float, string, or objects).
# It is similar to a column in an Excel sheet or a NumPy array, but with labels (index) for each element.

# A Pandas Series is like a column in a table.
# It is a one-dimensional array holding data of any type.

In [8]:
# Creating a Pandas Series
# You can create a Series using pd.Series() from: 
# 1. A Python list
# 2. A NumPy array
# 3. A Dictionary
# 4. A Scalar value

In [9]:
!pip install pandas 



# pd.Series() Method 

In [11]:
# The pd.Series() method is used to create a Pandas Series,
# a one-dimensional labeled array that can store data of any type (integers, floats, strings, objects, etc.).

# Syntax:
# pd.Series(data=None, index=None, dtype=None, name=None, copy=False, fastpath=False)

# Parameters:
# data	The data for the Series (list, NumPy array, dictionary, scalar, etc.).
# index	The labels for the Series (must be the same length as data).
# dtype	Specifies the data type (e.g., int, float, str).
# name	Assigns a name to the Series.
# copy	If True, forces a copy of data even if it's already a Series.
# fastpath	Internal parameter (not used by users).

# 1.data Parameter

In [15]:
# You can create a Series from: 
# 1. A list
# 2. A NumPy array
# 3. A dictionary
# 4. A scalar value

In [20]:
# Creating Series from the list.
import pandas as pd
l1 = [10,20,30,40]
s1 = pd.Series(l1)
print(s1)
print(type(s1))
# The index is automatically assigned (0,1,2,3).


0    10
1    20
2    30
3    40
dtype: int64
<class 'pandas.core.series.Series'>


In [21]:
# Creating Series from the numpy
import numpy as np
arr1 = np.array([10,20,30,40])
s1 = pd.Series(arr1)
print(s1)
# The dtype is automatically detected.

0    10
1    20
2    30
3    40
dtype: int32


In [27]:
# Creating Series from dictionary
d1 = {'name':'GOGO','role':'Developer'}
s1 = pd.Series(d1)
print(s1)

name         GOGO
role    Developer
dtype: object


In [30]:
# Creating Series from Scalar
s1 = pd.Series(5,index=['A','B','C'])
print(s1)

#  Repeats the scalar for each index.

A    5
B    5
C    5
dtype: int64


# 2.index Parameter (Custom Index Labels)

In [31]:
# You can customize the index instead of using default numbers.

In [33]:
s1 = pd.Series([10,20,30],index=['A','B','C'])
print(s1)
# The index is now A, B, C instead of 0,1,2.

A    10
B    20
C    30
dtype: int64


# 3.dtype Parameter (Specifying Data Type)

In [34]:
# You can define the data type explicitly.

In [36]:
s1 = pd.Series([10,20,30,40],dtype='float32')
print(s1)
print(type(s1))
# Forces the data to be stored as float32 instead of int64.

0    10.0
1    20.0
2    30.0
3    40.0
dtype: float32
<class 'pandas.core.series.Series'>


# 4.name Parameter (Assigning a Name to the Series)

In [37]:
# You can give the Series a name for better readability.

In [40]:
s1 = pd.Series([10,20,30,40],index=['A','B','C','D'],name='Values')
print(s1)
print(type(s1))

# Easier Identification in DataFrames
# When you convert a Series into a DataFrame or use it in a DataFrame, the name becomes the column name.

A    10
B    20
C    30
D    40
Name: Values, dtype: int64
<class 'pandas.core.series.Series'>


# 5.copy Parameter (Forcing a Copy)

In [41]:
# If copy=True, a new copy of the data is created even if data is already a Series.

In [None]:
# Here we haven't use copy parameter true.
s1 = pd.Series([10,20,30],index=['A','B','C'])
s2 = pd.Series(s1)
print(s1)
print(s2)
print()

print("Changed Series")
s2['A'] = 1000
print(s1)
print(s2)

# Properties of a Pandas Series

In [None]:
# It has several built-in properties that provide information about its structure and contents

# 1. s.index (Index Labels)

In [49]:
# The .index property returns the index labels of the Series.
s1 = pd.Series([10,20,30],['A','B','C'])
print(s1)
print(s1.index)
print(type(s1.index))

# Extracts the raw data as a NumPy array.

A    10
B    20
C    30
dtype: int64
Index(['A', 'B', 'C'], dtype='object')
<class 'pandas.core.indexes.base.Index'>


# 2. s.values (Array of Values)

In [51]:
# The .values property returns the underlying NumPy array of the Series.
s1 = pd.Series([10,20,30],['A','B','C'])
print(s1)
print(s1.values)
print(type(s1.values))

# Extracts the raw data as a NumPy array.

A    10
B    20
C    30
dtype: int64
[10 20 30]
<class 'numpy.ndarray'>


# 3. s.dtype (Data Type)

In [54]:
# The .dtype property returns the data type of the Series elements.
s1 = pd.Series([10,20,30],['A','B','C'])
print(s1)
print(s1.dtype)
print(type(s1.dtype))

#  Shows that the Series stores integers (int64).

A    10
B    20
C    30
dtype: int64
int64
<class 'numpy.dtype[int64]'>


# 4. s.shape (Shape of the Series)

In [55]:

# The .shape property returns the dimensions of the Series as a tuple.

s1 = pd.Series([10,20,30],['A','B','C'])
print(s1)
print(s1.shape)
print(type(s1.shape))


A    10
B    20
C    30
dtype: int64
(3,)
<class 'tuple'>


# 5. s.size (Number of Element).

In [56]:
# The .size property returns the total number of elements in the Series.
s1 = pd.Series([10,20,30],['A','B','C'])
print(s1)
print(s1.size)
print(type(s1.size))


A    10
B    20
C    30
dtype: int64
3
<class 'int'>


# 6. s.ndim (Number of Dimensions)

In [57]:
# The .ndim property returns the number of dimensions of the Series.

s1 = pd.Series([10,20,30],['A','B','C'])
print(s1)
print(s1.ndim)
print(type(s1.ndim))


A    10
B    20
C    30
dtype: int64
1
<class 'int'>


# 7. s.empty (Check if Series is Empty)

In [58]:
# The .empty property checks whether the Series has any elements.

s1 = pd.Series([10,20,30],['A','B','C'])
print(s1)
print(s1.empty)
print(type(s1.empty))


A    10
B    20
C    30
dtype: int64
False
<class 'bool'>


# 8. s.name (Series Name)

In [60]:
# The .name property returns or sets the name of the Series.

s1 = pd.Series([10,20,30],['A','B','C'])
print(s1)
s1.name = 'Data'
print(s1.name)

A    10
B    20
C    30
dtype: int64
Data


# 9. s.hasnans (Check for Missing Values)

In [61]:
# The .hasnans property returns True if the Series contains any missing (NaN) values.

s1 = pd.Series([10,None,30],['A','B','C'])
print(s1)
print(s1.hasnans)
print(type(s1))

A    10.0
B     NaN
C    30.0
dtype: float64
True
<class 'pandas.core.series.Series'>


# All Pandas Series Methods

In [1]:
# A Pandas Series is a 1D labeled array that provides various built-in methods
# for data manipulation, mathematical operations, and analysis.

# Data Manipulation Methods

# 1.Series.copy(deep=True)

In [2]:
# Creates a copy of the Series.

# Parameter	Description	Default
# deep	If True, creates a deep copy	True

In [5]:
import pandas as pd

s1 =  pd.Series([1,2,3],index=['A','B','C'])
s2 = s1.copy()
print("Original Series")
print(s1)
print()

print("Copied Series")
print(s2)

Original Series
A    1
B    2
C    3
dtype: int64

Copied Series
A    1
B    2
C    3
dtype: int64


# 2.Series.astype(dtype, copy=True, errors='raise')

In [10]:
# Converts the Series data type element to another different data type.
# Return the new series.

s1 = pd.Series([10,20,30])
s2 = s1.astype(float)
print("Original Series")
print(s1)
print()

print("Changed Series")
print(s2)

Original Series
0    10
1    20
2    30
dtype: int64

Changed Series
0    10.0
1    20.0
2    30.0
dtype: float64


# 3.Series.replace(to_replace, value, inplace=False, limit=None)

In [17]:
# Replaces values in the Series.
# Bydefault return the new Series with the replaced values.

# Parameter	Description	Default
# to_replace	Value(s) to replace	Required
# value	New value(s)	Required
# inplace	Modify in place	False
# limit	Max number of replacements per column	None.

In [31]:
# i.Using to_replace and value.

s1 = pd.Series([1,2,3,4,5,6,5,2],index=['A','B','C','D','E','F','G','H'])
s2 = s1.replace(2,99)
print(s1)
print(s2)
# Bydefault replace to_replace element with new element

A    1
B    2
C    3
D    4
E    5
F    6
G    5
H    2
dtype: int64
A     1
B    99
C     3
D     4
E     5
F     6
G     5
H    99
dtype: int64


In [32]:
# ii Using inplace parameter.
s1 = pd.Series([1,2,3,4,5,6,5,2],index=['A','B','C','D','E','F','G','H'])
s1.replace(2,99,inplace=True)
print(s1)

A     1
B    99
C     3
D     4
E     5
F     6
G     5
H    99
dtype: int64


In [35]:
# iii. Using limit parameter
s1 = pd.Series([10,20,30,40,10,10,20,40,20,10,20,20])
s2 = s1.replace(20,99,limit=1)
print(s2)

# Still replaces all.

0     10
1     99
2     30
3     40
4     10
5     10
6     99
7     40
8     99
9     10
10    99
11    99
dtype: int64


# 4.Series.drop(labels, axis=0, inplace=False, errors='raise')

In [36]:
# Removes elements by index.

# Parameter	Description	Default
# labels	Index label(s) to drop	Required
# axis	Always 0 for Series	0
# inplace	Modify in place	False
# errors	Handle missing labels ('raise', 'ignore')	'raise'

In [40]:
# i.Using Label 
s1 = pd.Series([10,20,30,40],index=['A','B','C','D'])
s2 = s1.drop('B')
print("Original Array")
print(s1)
print()

print("New Array")
print(s2)

Original Array
A    10
B    20
C    30
D    40
dtype: int64

New Array
A    10
C    30
D    40
dtype: int64


In [43]:
# ii. Using inplace 
s1 = pd.Series([10,20,30,40],index=['A','B','C','D'])
s1.drop('B',inplace=True)
print(s1)

A    10
C    30
D    40
dtype: int64


In [51]:
# iii.Using error

# The errors parameter in Series.drop() specifies what happens if the index label(s) you want to remove do not exist in the Series.
# a.errors='raise' (default) → Raises a KeyError if any label is missing.
# b.errors='ignore' → Ignores missing labels without throwing an error


# a.ignore error
s1 = pd.Series([10,20,30,40],index=['A','B','C','D'])
s1.drop('Z',inplace=True,errors='ignore')
print(s1)
# Here Z index doesn't exist in the Series so it ignore it as we use errors=ignore

A    10
B    20
C    30
D    40
dtype: int64


# Statistical & Aggregation Methods

# 1.Series.sum(axis=0, skipna=True)

In [57]:
# Computes the sum of values.

# Parameter	Description	Default
# axis	Axis (0 for Series)	0
# skipna	Ignore NaN values	True

s1 = pd.Series([10,20,30,40],index=['A','B','C','D'])
print(s1.sum())

100


In [56]:
# When none values are encountered it chnages data type from int to float.
s1 = pd.Series([10,20,30,40,None],index=['A','B','C','D','E'])
print(s1.sum(skipna=True))

100


# 2.Series.mean(axis=0, skipna=True)

In [58]:
# Computes the mean.

# Parameter	Description	Default
# axis	Always 0	0
# skipna	Ignore NaN values	True

s1 = pd.Series([10,20,30,40],index=['A','B','C','D'])
print(s1.mean())

25.0


In [59]:
s1 = pd.Series([10,20,30,40,None],index=['A','B','C','D','E'])
print(s1.mean(skipna=True))

25.0


# 3.Series.median(axis=0, skipna=True)

In [64]:
# Computes the median.

# Parameter	Description	Default
# axis	Always 0	0
# skipna	Ignore NaN values	True

In [69]:
s1 = pd.Series([10,20,30],index=['A','B','C'])
print(s1.median())

20.0


In [70]:
s1 = pd.Series([10,20,30,None],index=['A','B','C','D'])
print(s1.median(skipna=True))

20.0


# 4.Series.var(axis=0, ddof=1, skipna=True)

In [71]:
# Computes the variance.

# Parameter	Description	Default
# ddof	Delta degrees of freedom	1
# skipna	Ignore NaN values	True

s1 = pd.Series([10,20,30],index=['A','B','C'])
print(s1.var())

100.0


In [72]:
s1 = pd.Series([10,20,30,None],index=['A','B','C','D'])
print(s1.var(skipna=True))

100.0


# 5.Series.std(axis=0, ddof=1, skipna=True)

In [73]:
# Computes the standard deviation.

# Parameter	Description	Default
# axis	Always 0	0
# skipna	Ignore NaN values	True

s1 = pd.Series([10,20,30],index=['A','B','C'])
print(s1.std())

10.0


In [75]:
s1 = pd.Series([10,20,30,None],index=['A','B','C','D'])
print(s1.std(skipna=True))

10.0


#  Sorting Methods

# 1.Series.sort_values(ascending=True, inplace=False, ignore_index=False)

In [76]:
# Sorts the values.
# Bydefault return the new Series with sorted values.

# Parameter	Description	Default
# ascending	Sort order	True
# inplace	Modify in place	False
# ignore_index	Reset index	False

In [86]:
# i.Using ascending
s1 = pd.Series([40,50,10,20,30],index=['A','B','C','D','E'])
s2 = s1.sort_values(ascending=True)
print("Original Array")
print(s1)
print()

print("Sorted Array in Ascending Order")
print(s2)
print()

print("Original Array")
print(s1)
print()
print("Sorted Array in Descending Order")
s2 = s1.sort_values(ascending=False)
print(s2)


Original Array
A    40
B    50
C    10
D    20
E    30
dtype: int64

Sorted Array in Ascending Order
C    10
D    20
E    30
A    40
B    50
dtype: int64

Original Array
A    40
B    50
C    10
D    20
E    30
dtype: int64

Sorted Array in Descending Order
B    50
A    40
E    30
D    20
C    10
dtype: int64


In [82]:
# ii.Using inplace
s1 = pd.Series([40,50,10,20,30],index=['A','B','C','D','E'])
s1.sort_values(inplace=True)
print(s1)

C    10
D    20
E    30
A    40
B    50
dtype: int64


In [89]:
# iii. Using ignore_index

# The ignore_index parameter in Series.sort_values() controls whether the original index is retained after sorting.
# a.ignore_index=False (default) → Keeps the original index after sorting.
# b.ignore_index=True → Resets the index to a default integer range (0, 1, 2, ...).

# b. Not Keeping the original Array indexing.
s1 = pd.Series([40,50,10,20,30],index=['A','B','C','D','E'])
s2 = s1.sort_values(ignore_index=True)
print(s1)
print(s2)

# When ignore_index=True, the index is reset to a sequential range (0, 1, 2, ...).

A    40
B    50
C    10
D    20
E    30
dtype: int64
0    10
1    20
2    30
3    40
4    50
dtype: int64


# 2.Series.sort_index(ascending=True, inplace=False)

In [90]:
# Sorts by index.
# The Series is sorted by the lables not by the values.

# Parameter	Description	Default
# ascending	Sort order	True
# inplace	Modify in place	False

In [93]:
# i.Using Ascending True 
s1 = pd.Series([40,50,10,20,30],index=['A','B','C','D','E'])
s2 = s1.sort_index(ascending=True)
print(s2)

# Using Ascending False
s1 = pd.Series([40,50,10,20,30],index=['A','B','C','D','E'])
s2 = s1.sort_index(ascending=False)
print(s2)

A    40
B    50
C    10
D    20
E    30
dtype: int64
E    30
D    20
C    10
B    50
A    40
dtype: int64


In [94]:
# ii. Using inplace
s1 = pd.Series([40,50,10,20,30],index=['A','B','C','D','E'])
s1.sort_index(inplace=True)
print(s1)

A    40
B    50
C    10
D    20
E    30
dtype: int64


# String Operations

# 1.Series.str.upper()

In [95]:
# Converts strings to uppercase.
# Return the new Series with all the alphabets in Upper Case.

In [98]:
s1 = pd.Series(['Hello','World'])
s2 = s1.str.upper()
print(s2)

0    HELLO
1    WORLD
dtype: object
0    Hello
1    World
dtype: object


# 2.Series.str.contains(pattern, case=True, na=False)

In [100]:
# Checks if strings contain a pattern.
# Return the Series containing boolean values.

# Parameter	Description	Default
# pattern	String or regex pattern	Required
# case	Case-sensitive match	True
# na	Value for missing data	False

s = pd.Series(["apple", "banana", "cherry"])
print(s.str.contains("an"))

0    False
1     True
2    False
dtype: bool


# Missing Data Handling

# 1.Series.isnull()

In [101]:
# Checks for missing values.
# Return new Series that contains the True for the index that contains None value else False.
s1 = pd.Series([10,None,20])
s2 = s1.isnull()
print(s2)

0    False
1     True
2    False
dtype: bool


# 2.Series.fillna(value, inplace=False)

In [107]:
# Fills NaN values.
# Return the new Series in which none is replace with passed value.

# Parameter	Description	Default
# value	Replacement value	Required
# inplace	Modify in place	False

s1 = pd.Series([10,20,None,30,40,None])
s2 = s1.fillna(0)
print(s2)

0    10.0
1    20.0
2     0.0
3    30.0
4    40.0
5     0.0
dtype: float64


# 3.Series.dropna(inplace=False)

In [114]:
# Removes NaN values.

s1 = pd.Series([1, None, 3])
s2 = s1.dropna()
print(s2)

# Using inplace
s1 = pd.Series([1,2,3,None,4,5,None])
s1.dropna(inplace=True)
print(s1)

0    1.0
2    3.0
dtype: float64
0    1.0
1    2.0
2    3.0
4    4.0
5    5.0
dtype: float64


# Indexing in Series

In [115]:
# Indexing in a Pandas Series refers to accessing and manipulating elements using labels or positions.

In [117]:
# 1.Default Indexing
# By default, Pandas assigns integer indices (0, 1, 2, ...) to Series elements.

s1 = pd.Series([10,20,30,40])
print(s1[0])

# The default index is 0, 1, 2, 3.
# You can access elements using these indices.

10


In [121]:
# 2. Custom Indexing
# You can define a custom index (labels) instead of the default integers.
s1 = pd.Series([10,20,30,40],index=['A','B','C','D'])
print(s1['A'])
print(s1['B'])
print(s1['C'])
print(s1['D'])

10
20
30
40


In [123]:
# 3 Positional Indexing (Using iloc)
# Pandas provides integer-based indexing using .iloc[].
print(s1.iloc[0])
print(s1.iloc[1])
print(s1.iloc[2])
print(s1.iloc[3])

# .iloc[n] → Retrieves the element at position n (like a list).



10
20
30
40


In [124]:
# 4.Label-Based Indexing (Using loc)
# If your Series has a custom index, you can access values using .loc[].
print(s1.loc['A'])
print(s1.loc['B'])
print(s1.loc['C'])
print(s1.loc['D'])


10
20
30
40


In [129]:
# 5.Boolean Indexing
# You can filter values based on a condition.
print(s1[s1>5])

A    10
B    20
C    30
D    40
dtype: int64


In [131]:
# 6. Negative Indexing
# Pandas does support negative indexing like Python lists. 
# And you can use .iloc[] for negative indexing also

print(s1[-1])
print(s1[-2])
print(s1[-3])
print(s1[-4])

40
30
20
10


# Slicing in Pandas Series

In [136]:
# 1.Slicing Using Default Indexing (Integer-Based)
# If the Series has the default numeric index (0, 1, 2,...), we can use Python-style slicing.
s1 = pd.Series([10,20,30,40])
print(s1[:])
print(type(s1[:]))

0    10
1    20
2    30
3    40
dtype: int64
<class 'pandas.core.series.Series'>


In [137]:
print(s1[:s1.size])
print(type(s1[:s1.size]))

0    10
1    20
2    30
3    40
dtype: int64
<class 'pandas.core.series.Series'>


In [138]:
print(s1[0:])
print(type(s1[0:]))

0    10
1    20
2    30
3    40
dtype: int64
<class 'pandas.core.series.Series'>


In [139]:
print(s1[0:3])
print(type(s1[0:3]))

0    10
1    20
2    30
dtype: int64
<class 'pandas.core.series.Series'>


In [142]:
# 2.Slicing Using Custom Indexing (Label-Based)
# If the Series has custom labels, slicing will include the endpoint.

s1 = pd.Series([10,20,30,40],index=['A','B','C','D'])
print(s1[:])

A    10
B    20
C    30
D    40
dtype: int64


In [144]:
print(s1['A':'D'])
# Unlike default indexing, slicing with labels includes the endpoint ('d' is included).

A    10
B    20
C    30
D    40
dtype: int64


In [145]:
# 3. Slicing Using iloc[] (Position-Based Indexing)
# If you want to slice using position instead of labels, use .iloc[].
print(s1.iloc[:])

A    10
B    20
C    30
D    40
dtype: int64


In [148]:
print(s1.iloc[0:3])

A    10
B    20
C    30
dtype: int64


In [149]:
# 4.Slicing Using loc[] (Label-Based Indexing)
# If you slice using .loc[], the end index is included.
print(s1.loc['A':'D'])

A    10
B    20
C    30
D    40
dtype: int64


In [150]:
print(s1['A':])

A    10
B    20
C    30
D    40
dtype: int64


In [151]:
print(s1['A':'B'])

A    10
B    20
dtype: int64


In [152]:
# 5.Slicing with Step (Stepping)
# You can use a step to skip elements.
print(s1[::2])

A    10
C    30
dtype: int64


In [153]:
print(s1[::])

A    10
B    20
C    30
D    40
dtype: int64


In [154]:
print(s1[::-1])

D    40
C    30
B    20
A    10
dtype: int64


In [156]:
# 6.Slicing Using Conditions (Boolean Indexing)
# You can filter the Series based on conditions.

print(s1[s1>=20])
# Only values greater than 20 are selected.

B    20
C    30
D    40
dtype: int64


# Arithmetic Operations on Pandas Series

In [157]:
# Pandas Series supports element-wise arithmetic operations just like NumPy arrays. 
# Operations are performed index-wise, meaning the indexes are aligned before the operation is applied.

In [158]:
# 1. Basic Arithmetic Operations
# Operator	Description	Example
# +	Addition	s1 + s2
# -	Subtraction	s1 - s2
# *	Multiplication	s1 * s2
# /	Division	s1 / s2
# //	Floor Division	s1 // s2
# %	Modulus	s1 % s2
# **	Power	s1 ** s2

In [160]:
s1 = pd.Series([1,2,3,4])
s2 = pd.Series([1,2,3,4])
print(s1+s2)

0    2
1    4
2    6
3    8
dtype: int64


In [161]:
print(s1-s2)

0    0
1    0
2    0
3    0
dtype: int64


In [162]:
print(s1*s2)

0     1
1     4
2     9
3    16
dtype: int64


In [163]:
print(s1/s2)

0    1.0
1    1.0
2    1.0
3    1.0
dtype: float64


In [164]:
print(s1%s2)

0    0
1    0
2    0
3    0
dtype: int64


In [165]:
print(s1**s2)

0      1
1      4
2     27
3    256
dtype: int64


In [166]:
# 2.Element-Wise Operations Between Two Series
# Operations happen index-wise, meaning matching indexes are used for calculations.

print(s1+s2)

0    2
1    4
2    6
3    8
dtype: int64


In [167]:
# 3. Handling Missing Indexes (Alignment)
# If indexes do not match, missing values (NaN) appear.

s1 = pd.Series([1,3,4],index=['a','c','d'])
s2 = pd.Series([1,2,4],index=['a','b','d'])
print(s1+s2)
# If an index is missing in either Series, the result is NaN.

a    2.0
b    NaN
c    NaN
d    8.0
dtype: float64


In [169]:
# 4.Filling Missing Values with fill_value
# We can use .add(), .sub(), .mul(), .div(), etc., with fill_value to replace NaN.

print(s1.add(s2,fill_value=0))

a    2.0
b    2.0
c    3.0
d    8.0
dtype: float64


In [180]:
# 5. Scalar Arithmetic Operations
# A scalar operation applies to all elements in the Series.
s1 = pd.Series([1,2,3,4])
print(s1)

0    1
1    2
2    3
3    4
dtype: int64


In [181]:
print(s1+1)

0    2
1    3
2    4
3    5
dtype: int64


In [182]:
print(s1-1)

0    0
1    1
2    2
3    3
dtype: int64


In [185]:
# 6.Comparing Two Series
# Comparison is element-wise, returning a boolean Series.

s1 = pd.Series([1,2,3,4])
s2 = pd.Series([2,3,4,5])
print(s1==s2)
print(s1<s2)

0    False
1    False
2    False
3    False
dtype: bool
0    True
1    True
2    True
3    True
dtype: bool


# Mathematical & Statistical Methods

In [None]:
# These methods help analyze Series values numerically.

# Method	Description	Parameters	Example\n
# sum()	Returns sum of all values	axis=0, skipna=True	s.sum()\n
# mean()	Returns mean (average)	axis=0, skipna=True	s.mean()\n
# median()	Returns median	axis=0, skipna=True	s.median()\n
# min()	Returns min value	axis=0, skipna=True	s.min()\n
# max()	Returns max value	axis=0, skipna=True	s.max()\n
# std()	Returns standard deviation	axis=0, skipna=True	s.std()\n
# var()	Returns variance	axis=0, skipna=True	s.var()\n
# prod()	Returns product of values	axis=0, skipna=True	s.prod()\n
# cumsum()	Cumulative sum	axis=0, skipna=True	s.cumsum()\n
# cumprod()	Cumulative product	axis=0, skipna=True	s.cumprod()\n


In [186]:
s1 = pd.Series([1,2,3,4])
print(s1)

0    1
1    2
2    3
3    4
dtype: int64


In [187]:
print(s1.sum())

10


In [188]:
print(s1.median())

2.5


In [190]:
print(s1.min())

1


In [192]:
print(s1.max())

4


In [193]:
print(s1.var())

1.6666666666666667


In [194]:
print(s1.std())

1.2909944487358056


In [195]:
print(s1.prod())

24


In [197]:
print(s1.cumsum())
print(type(s1.cumsum()))

0     1
1     3
2     6
3    10
dtype: int64
<class 'pandas.core.series.Series'>


In [198]:
print(s1.cumprod())

0     1
1     2
2     6
3    24
dtype: int64


# 2.Data Transformation & Manipulation

In [199]:
# Method	Description	Parameters	
# astype()	Converts type	dtype, copy=True, errors='raise'	s.astype(int)
# copy()	Returns a copy of Series	deep=True	s.copy()
# replace()	Replaces values	to_replace, value, inplace=False, limit=None	s.replace(10, 100)
# clip()	Limits values within range	lower, upper, inplace=False	s.clip(5, 20)
# drop()	Removes index labels	labels, axis=0, inplace=False, errors='raise'	s.drop([0,2])
# sort_values()	Sorts values	ascending=True, inplace=False, ignore_index=False	s.sort_values()
# sort_index()	Sorts by index	ascending=True, inplace=False	s.sort_index()
# reset_index()	Resets index	drop=False, inplace=False	s.reset_index(drop=True)

In [200]:
s1 = pd.Series([1,2,3,4])
print(s1)

0    1
1    2
2    3
3    4
dtype: int64


In [203]:
s2 = s1.astype(float)
print("Original Array")
print(s1)
print()
print("Changed Array")
print(s2)

Original Array
0    1
1    2
2    3
3    4
dtype: int64

Changed Array
0    1.0
1    2.0
2    3.0
3    4.0
dtype: float64


In [207]:
s1 = pd.Series([1,2,3,4])
s2 = s1.copy(False)
s2[0] = 1000
print(s1)
print(s2)

0    1000
1       2
2       3
3       4
dtype: int64
0    1000
1       2
2       3
3       4
dtype: int64


In [209]:
s1 = pd.Series([1,2,3,4,5,1,1,2,4])
s2 = s1.replace(1,100)
print("Original Array")
print(s1)
print()
print("Changed Array")
print(s2)

Original Array
0    1
1    2
2    3
3    4
4    5
5    1
6    1
7    2
8    4
dtype: int64

Changed Array
0    100
1      2
2      3
3      4
4      5
5    100
6    100
7      2
8      4
dtype: int64


In [213]:
s1 = pd.Series([1,2,3,4,5,6,7,8])
s2 = s1.clip(3,7)
print(s2)

0    3
1    3
2    3
3    4
4    5
5    6
6    7
7    7
dtype: int64


In [216]:
s1 = pd.Series([1,2,3,4,5,6,7,8])
s2 = s1.drop(0)
print(s2)
# removed the 0th index.

1    2
2    3
3    4
4    5
5    6
6    7
7    8
dtype: int64


In [224]:
s1 = pd.Series([1,2,3,4],index=['a','b','c','d'])
s2 = s1.reset_index()
print(s2)
print(type(s2))

  index  0
0     a  1
1     b  2
2     c  3
3     d  4
<class 'pandas.core.frame.DataFrame'>


# 3. Boolean & Logical Methods

In [225]:
# Method	Description	Parameters	Example
# all()	Checks if all elements are True	axis=0, skipna=True	s.all()
# any()	Checks if any element is True	axis=0, skipna=True	s.any()
# isna()	Checks for NaN values	-	s.isna()
# notna()	Checks for non-NaN values	-	s.notna()
# between()	Checks if values are within range	left, right, inclusive=True	s.between(10, 50)
# where()	Replaces values where condition is False	cond, other, inplace=False	s.where(s > 20, 0)

In [229]:
s1 = pd.Series([1,2,3,0,None])
print(s1)

0    1.0
1    2.0
2    3.0
3    0.0
4    NaN
dtype: float64


In [230]:
# Return the single boolean value
s2 = s1.all()
print(s2)

False


In [232]:
# Return the single boolean value
s2 = s1.any()
print(s2)

True


In [234]:
# It checks whether any element in Series is none or not 
# Return the Series containing True for the none values and false for the non-null values.
s2 = s1.isna()
print(s2)

0    False
1    False
2    False
3    False
4     True
dtype: bool


In [235]:
# It checks whether any element in Series is none or not 
# Return the Series containing True for the non-none values and false for the null values.
s2 = s1.notna()
print(s2)

0     True
1     True
2     True
3     True
4    False
dtype: bool


In [237]:
# It checks wheter the values in Series lie in the specfied range or not
# Returns the boolean series.
s2 = s1.between(1,4)
print(s2)

0     True
1     True
2     True
3    False
4    False
dtype: bool


In [246]:
# It replace the false with the passed values.
s1 = pd.Series([1,2,3,4,5,6])
s2 = s1.where(s1>5,100)
print(s2)

0    100
1    100
2    100
3    100
4    100
5      6
dtype: int64


# 4. Indexing & Selection Methods

In [247]:
# Method	Description	Parameters	Example
# iloc[]	Select by position	[start:end]	s.iloc[1:3]
# loc[]	Select by label	[start:end]	s.loc['a':'c']
# at[]	Fast access to scalar value by label	index	s.at['b']
# iat[]	Fast access to scalar value by position	index	s.iat[2]

In [249]:
s1 = pd.Series([1,2,3,4])
print(s1.iloc[0])

1


In [251]:
s1 = pd.Series([1,2,3,4],index=['A','B','C','D'])
print(s1.loc['B'])

2


In [254]:
print(s1.iat[1])

2


# 5. String Methods (For Text Data)

In [255]:
# Method	Description	Example
# str.lower()	Converts to lowercase	s.str.lower()
# str.upper()	Converts to uppercase	s.str.upper()
# str.contains()	Checks for substring	s.str.contains('hello')
# str.replace()	Replaces substrings	s.str.replace('old', 'new')
# str.len()	Gets length of each string	s.str.len()

In [256]:
s1 = pd.Series(['Hello','World'])
print(s1)

0    Hello
1    World
dtype: object


In [257]:
s2 = s1.str.upper()
print(s2)

0    HELLO
1    WORLD
dtype: object


In [258]:
s2 = s1.str.lower()
print(s2)

0    hello
1    world
dtype: object


In [259]:
s2 = s1.str.contains('l')
print(s2)

0    True
1    True
dtype: bool


In [261]:
s2 = s1.str.replace('l','L')
print(s2)

0    HeLLo
1    WorLd
dtype: object


In [262]:
s2 = s1.str.len()
print(s2)

0    5
1    5
dtype: int64


# 6.Aggregation & Summary Methods


In [263]:
# Method	Description	Example
# count()	Number of non-null values	s.count()
# unique()	Returns unique values	s.unique()
# nunique()	Number of unique values	s.nunique()
# value_counts()	Frequency of each unique value	s.value_counts()
# idxmax()	Index of max value	s.idxmax()
# idxmin()	Index of min value	s.idxmin()

In [264]:
s1 = pd.Series([1,2,3,1,3,1,2,4,2,5])
print(s1)

0    1
1    2
2    3
3    1
4    3
5    1
6    2
7    4
8    2
9    5
dtype: int64


In [265]:
# Return the total elements of the Series.
s2 = s1.count()
print(s2)

10


In [275]:
# Return all the unique element of the Series.
s2 = s1.unique()
print(s2)

[1 2 3 4 5]


In [279]:
# Return the count of unique values.
s1 = pd.Series([1,2,3,4,'a','c',1])
print(s1.nunique())

6


In [281]:
# Return the frequency of the unique values
print(s1.value_counts())

1    2
2    1
3    1
4    1
a    1
c    1
Name: count, dtype: int64


In [285]:
# Return the index which contains maximum value.
s1 = pd.Series([1,23,4,4],index=['a','b','c','d'])
s2 = s1.idxmax()
print(s2)

b


In [287]:
# Return the index which contains minimum value
s1 = pd.Series([1,23,4,4],index=['a','b','c','d'])
s2 = s1.idxmin()
print(s2)

a


# 7. Combining & Merging Series

In [288]:
# Method	Description	Example
# concat()	Adds another Series	s1.append(s2)
# combine()	Element-wise combination	s1.combine(s2, max)
# combine_first()	Fills NaN with another Series	s1.combine_first(s2)

In [295]:
s1 = pd.Series([1,2,3,4,5])
s2 = pd.Series([6,7,8,9])
s3 = (pd.concat([s1,s2]))
print(s3)

0    1
1    2
2    3
3    4
4    5
0    6
1    7
2    8
3    9
dtype: int64


In [303]:
# 2. combine()
# Applies an element-wise function to combine two Series.
# Used when you want to merge two Series but keep the best value for each position.
# You must provide a function (func) to determine how the values are combined.

# 🔹 Syntax:
# Series.combine(other, func, fill_value=None)

# Compare the values index by index and assign the maximum value.
s3 = s1.combine(s2,max)
print(s3)

0    6
1    7
2    8
3    9
4    5
dtype: int64


In [304]:
# 3.combine_first()
# Fills missing values (NaN) in one Series using another.
# It does not modify non-missing values in the first Series.

In [305]:
s1 = pd.Series([1,None,3,None,5])
s2 = pd.Series([10,20,30,40,50])
s3 = s1.combine_first(s2)
print(s3)

# The None values in s1 are replaced by the corresponding values in s2.
# Other values remain unchanged.

0     1.0
1    20.0
2     3.0
3    40.0
4     5.0
dtype: float64
