# PANDAS - SERIES & DATAFRAMES

In [70]:
import numpy as np      # For numerical operations and arrays
import pandas as pd     # For data manipulation and analysis
import matplotlib.pyplot as plt   # For plotting and data visualization
import glob             # For file pattern matching (e.g., batch file reading)
import re               # For working with regular expressions (pattern matching in text)
import math             # For mathematical functions and constants

In [71]:
import warnings
warnings.filterwarnings("ignore")

# SERIES

## Create Series

In [143]:
# Create series from Nump Array
v = np.array([1,2,3,4,5,6,7])
s1 = pd.Series(v)
s1

0    1
1    2
2    3
3    4
4    5
5    6
6    7
dtype: int32

In [145]:
# Datatypes of Series
s1.dtype

dtype('int32')

In [76]:
# number of bytes allocated to each item
print(s1.dtype.itemsize) 

4


In [77]:
# Number of bytes consumed by Series
s1.nbytes

28

In [78]:
# Shape of the Series
s1.shape

(7,)

In [79]:
# Number of Dimensions
s1.ndim

1

In [80]:
# Length of Series
len(s1)

7

In [81]:
s1.count()

7

In [82]:
s1.size

7

In [83]:
# Create series for List 
s0 = pd.Series([1,2,3], index =['a','b','c'])
s0

a    1
b    2
c    3
dtype: int64

In [84]:
# Modify index in Series 
s1.index = ['a','b','c','d','e','f','g']
s1

a    1
b    2
c    3
d    4
e    5
f    6
g    7
dtype: int32

In [85]:
# Create Series using Random and Range Function
v2 = np.random.random(10)    # generates 10 random float numbers between 0 and 1
ind2 = np.arange(0,10)       # creates an array [0,1,2,...,9]
s = pd.Series(v2, ind2)      # creates a Pandas Series with v2 as values and ind2 as index
v2, ind2, s                  # displays all three

(array([0.85652124, 0.59950467, 0.33345802, 0.07683699, 0.42582631,
        0.67116761, 0.16606728, 0.24029077, 0.61673574, 0.61140212]),
 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
 0    0.856521
 1    0.599505
 2    0.333458
 3    0.076837
 4    0.425826
 5    0.671168
 6    0.166067
 7    0.240291
 8    0.616736
 9    0.611402
 dtype: float64)

In [86]:
# Create Series from Dictionary
dict1 = {'a1':10 , 'a2':20 , 'a3':30 , 'a4':40}
s3 = pd.Series(dict1)
s3

a1    10
a2    20
a3    30
a4    40
dtype: int64

In [87]:
# Creates a Series with value 99 repeated for indices 0–5
pd.Series(99, index=[0,1,2,3,4,5])   

0    99
1    99
2    99
3    99
4    99
5    99
dtype: int64

## Slicing Series

In [89]:
s

0    0.856521
1    0.599505
2    0.333458
3    0.076837
4    0.425826
5    0.671168
6    0.166067
7    0.240291
8    0.616736
9    0.611402
dtype: float64

In [90]:
# Return all element of the series
s[:]

0    0.856521
1    0.599505
2    0.333458
3    0.076837
4    0.425826
5    0.671168
6    0.166067
7    0.240291
8    0.616736
9    0.611402
dtype: float64

In [91]:
# First three element of the Series
s[0:3]

0    0.856521
1    0.599505
2    0.333458
dtype: float64

In [92]:
# LAst element of the Series 
s[-1:]

9    0.611402
dtype: float64

In [93]:
# Fetch forst 4 element in a series 
s[:4]

0    0.856521
1    0.599505
2    0.333458
3    0.076837
dtype: float64

In [94]:
# Return all elements of the series except last two element 
s[:-2]

0    0.856521
1    0.599505
2    0.333458
3    0.076837
4    0.425826
5    0.671168
6    0.166067
7    0.240291
dtype: float64

In [95]:
# Return all elements of the series except last element 
s[:-1]

0    0.856521
1    0.599505
2    0.333458
3    0.076837
4    0.425826
5    0.671168
6    0.166067
7    0.240291
8    0.616736
dtype: float64

In [96]:
# Return last two elements of the series
s[-2:]

8    0.616736
9    0.611402
dtype: float64

In [97]:
# Return last element of the Series
s[-1:]

9    0.611402
dtype: float64

In [98]:
s[-3:-1]

7    0.240291
8    0.616736
dtype: float64

## Append Series

In [159]:
s1 = pd.Series([10, 20, 30])   # original Series

In [167]:
s2 = s1.copy()                 # makes a copy of s1
s2

0    10
1    20
2    30
dtype: int64

In [169]:
s3

a1    10
a2    20
a3    30
a4    40
dtype: int64

In [173]:
# Append s2 & s3 Series
s4 = pd.concat([s2, s3])
s4

0     10
1     20
2     30
a1    10
a2    20
a3    30
a4    40
dtype: int64

Append was removed in Pandas Version 2.0. Instead of that "concat" is used 

In [177]:
# When "inplace=False" it will return a new copy of data with the operation performed
s4.drop('a4' , inplace=False)

0     10
1     20
2     30
a1    10
a2    20
a3    30
dtype: int64

In [179]:
s4

0     10
1     20
2     30
a1    10
a2    20
a3    30
a4    40
dtype: int64

In [181]:
# When we use "inplace=True" it will affect the dataframe
s4.drop('a4', inplace=True)
s4

0     10
1     20
2     30
a1    10
a2    20
a3    30
dtype: int64

In [183]:
s4

0     10
1     20
2     30
a1    10
a2    20
a3    30
dtype: int64

In [189]:
s4 = pd.concat([s4, pd.Series({'a4': 7})])
s4

0     10
1     20
2     30
a1    10
a2    20
a3    30
a4     7
a4     7
dtype: int64

## Operation on Series

In [194]:
v1 = np.array([10,20,30])
v2 = np.array([1,2,3])
s1 = pd.Series(v1)
s2 = pd.Series(v2)
s1,s2

(0    10
 1    20
 2    30
 dtype: int32,
 0    1
 1    2
 2    3
 dtype: int32)

In [196]:
# Addition of Two Series
s1.add(s2)

0    11
1    22
2    33
dtype: int32

In [200]:
# Substraction of Two Series
s1.sub(s2)

0     9
1    18
2    27
dtype: int32

In [202]:
# Subtraction of two series
s1.subtract(s2)

0     9
1    18
2    27
dtype: int32

In [204]:
# Increment all numbers in a series by 9
s1.add(9)

0    19
1    29
2    39
dtype: int32

In [210]:
# Multiplication of two series
s1.mul(s2)

0    10
1    40
2    90
dtype: int32

In [208]:
# Multiplication of two series
s1.multiply(s2)

0    10
1    40
2    90
dtype: int32

In [212]:
# Multiply each element by 1000
s1.multiply(1000)

0    10000
1    20000
2    30000
dtype: int32

In [214]:
# Division
s1.divide(s2)

0    10.0
1    10.0
2    10.0
dtype: float64

In [216]:
# Division
s1.div(s2)

0    10.0
1    10.0
2    10.0
dtype: float64

In [218]:
# MAX number in a series
s1.max()

30

In [220]:
# Min number in a series
s1.min()

10

In [222]:
# Average
s1.mean()

20.0

In [226]:
# Median
s1.median()

20.0

In [228]:
# Standard Deviation
s1.std()

10.0

In [230]:
# Series comparison
s1.equals(s2)

False

In [232]:
s4 = s1

In [234]:
# Series comparison
s1.equals(s4)


True

In [236]:
s5 = pd.Series([1,1,2,2,3,3], index=[0, 1, 2, 3, 4, 5])
s5

0    1
1    1
2    2
3    2
4    3
5    3
dtype: int64

In [238]:
s5.value_counts()

1    2
2    2
3    2
Name: count, dtype: int64