In [1]:
# Pandas is an open-source python library that has function providing high-performance data manipulation and analysis.

# Working with Pandas Series

In [4]:
# Pandas series is a one-dimensional labeled array capable of holding data of any type

In [6]:
import pandas as pd

In [8]:
lst = [1, 2, 3]
pd.Series(lst)

0    1
1    2
2    3
dtype: int64

In [10]:
# Series through numpy

In [12]:
import numpy as np

In [14]:
arr = np.array([1,2,3,4])
pd.Series(arr)

0    1
1    2
2    3
3    4
dtype: int32

In [16]:
# Given index from our own end

In [18]:
pd.Series(data = ['Eshant', 'Pranjal', 'Jayesh', 'Ashish'], index = [1,2,3,4])

1     Eshant
2    Pranjal
3     Jayesh
4     Ashish
dtype: object

In [22]:
pd.Series(index = ['Eshant', 'Pranjal', 'Jayesh', 'Ashish'], data = [1,2,3,4])

Eshant     1
Pranjal    2
Jayesh     3
Ashish     4
dtype: int64

In [24]:
# Series through dictionary

In [26]:
step = {'day_1' : 4000, 'day_2' : 3000, 'Day_3' : 1200}
pd.Series(step)

day_1    4000
day_2    3000
Day_3    1200
dtype: int64

### Repeat Function

In [29]:
# pandas.Series.repeat() function repeat element of a series. It returns a new series where each element of the current
# series is repeated consecutively a given no. of times

In [33]:
pd.Series(5).repeat(5)

0    5
0    5
0    5
0    5
0    5
dtype: int64

In [37]:
pd.Series(5).repeat(3)

0    5
0    5
0    5
dtype: int64

### Reset Function

In [40]:
# In repeat f/c index are same (i.e 0, 0, 0)
# We can use reset function to change the index accurate

In [42]:
pd.Series(5).repeat(5).reset_index()

Unnamed: 0,index,0
0,0,5
1,0,5
2,0,5
3,0,5
4,0,5


In [44]:
pd.Series(5).repeat(5).reset_index(drop=True)

0    5
1    5
2    5
3    5
4    5
dtype: int64

In [46]:
# repeat 10 two times
# repeat 20 three times
# repeat 30 five times
pd.Series([10,20,30]).repeat([2,3,5]) 

0    10
0    10
1    20
1    20
1    20
2    30
2    30
2    30
2    30
2    30
dtype: int64

In [48]:
pd.Series([10,20,30]).repeat([2,3,5]).reset_index(drop = True)


0    10
1    10
2    20
3    20
4    20
5    30
6    30
7    30
8    30
9    30
dtype: int64

### Accessing the element

In [55]:
# We can access the element based on index

In [53]:
ser = pd.Series([10,20]).repeat([5,2]).reset_index(drop = True)
ser

0    10
1    10
2    10
3    10
4    10
5    20
6    20
dtype: int64

In [57]:
ser[0]

10

In [59]:
ser[3]

10

In [61]:
ser[5]

20

In [63]:
ser[2:5]

2    10
3    10
4    10
dtype: int64

In [65]:
ser[2:6]

2    10
3    10
4    10
5    20
dtype: int64

In [67]:
ser[4:7]

4    10
5    20
6    20
dtype: int64

In [69]:
ser[:6]

0    10
1    10
2    10
3    10
4    10
5    20
dtype: int64

In [71]:
ser[1:-1]

1    10
2    10
3    10
4    10
5    20
dtype: int64

### Aggregating Function on Pandas Series

In [74]:
# PandasSeries.aggeration() f/c aggregate using one or more operations over the specified axis in the given series object.

In [78]:
sr = pd.Series([1,2,3,4,5,6,7])
sr.agg(['min'])

min    1
dtype: int64

In [80]:
sr.agg(['min', 'max', 'sum'])

min     1
max     7
sum    28
dtype: int64

### Series Absolute Function

In [83]:
# pandas series.absolut() method is used to get the absolute numeric value of each element in series/DataFFrame 

In [85]:
sre = pd.Series([-1, -2, -4, 5, -6, 7])
sre.abs()

0    1
1    2
2    4
3    5
4    6
5    7
dtype: int64

### Appending Series

In [88]:
# Append f/c is used to concatenate two or more series object
# Append f/c have removed from future version of pandas so instead of append use pd.concat([arg1, arg2])

In [90]:
sr1 = pd.Series([1,-2,3,-4,5,-6,7])
sr2 = pd.Series([1,2,3,4,5,6,7])
sr1,sr2

(0    1
 1   -2
 2    3
 3   -4
 4    5
 5   -6
 6    7
 dtype: int64,
 0    1
 1    2
 2    3
 3    4
 4    5
 5    6
 6    7
 dtype: int64)

In [96]:
pd.concat([sr1,sr2])

0    1
1   -2
2    3
3   -4
4    5
5   -6
6    7
0    1
1    2
2    3
3    4
4    5
5    6
6    7
dtype: int64

### Astype Function

In [99]:
# Astype f/c is the most important function it is used to change data type of a series.

In [101]:
sr1

0    1
1   -2
2    3
3   -4
4    5
5   -6
6    7
dtype: int64

In [103]:
type(sr1)

pandas.core.series.Series

In [105]:
type(sr1[0])

numpy.int64

In [107]:
sr1.astype('float')

0    1.0
1   -2.0
2    3.0
3   -4.0
4    5.0
5   -6.0
6    7.0
dtype: float64

In [109]:
sr1.astype('str')

0     1
1    -2
2     3
3    -4
4     5
5    -6
6     7
dtype: object

### Between Function

In [112]:
# Used to check which values lies between first and second argument

In [114]:
ser1 = pd.Series([1,6,3,9,8,5,3,2])
ser1.between(6,5)

0    False
1    False
2    False
3    False
4    False
5    False
6    False
7    False
dtype: bool

In [116]:
ser1 = pd.Series([1,6,3,9,8,5,3,2])
ser1.between(5,6)

0    False
1     True
2    False
3    False
4    False
5     True
6    False
7    False
dtype: bool

In [118]:
ser1 = pd.Series([1,6,3,9,8,5,3,2])
ser1.between(2,8)

0    False
1     True
2     True
3    False
4     True
5     True
6     True
7     True
dtype: bool

# All String f/c can be used to Extract or Modify text in Series

### 1. Upper and Lower Function

In [123]:
sr = pd.Series(['Eshant', 'Pranjal', 'Jayesh', 'Ashish'])
sr

0     Eshant
1    Pranjal
2     Jayesh
3     Ashish
dtype: object

In [125]:
sr.str.upper() # it will convert letter in upper case

0     ESHANT
1    PRANJAL
2     JAYESH
3     ASHISH
dtype: object

In [127]:
sr.str.lower() # It will convert letter in lower case

0     eshant
1    pranjal
2     jayesh
3     ashish
dtype: object

### 2. Length Function

In [130]:
for i in sr:
    print(len(i))

6
7
6
6


### 3. Strip Function 

In [135]:
# Strip f/c are used to remove extra spaces

In [139]:
ser = pd.Series(['   Eshant', 'Pranjal    ', '  Jayesh  ', 'Ashish      '])
for i in ser:
    print(i, len(i))

   Eshant 9
Pranjal     11
  Jayesh   10
Ashish       12


In [143]:
series = ser.str.strip()

In [145]:
for i in series:
    print(i, len(i))

Eshant 6
Pranjal 7
Jayesh 6
Ashish 6


### 4. Split Function

In [148]:
# Split function, split every first word in the pandas series

In [152]:
ser_ = pd.Series(["Eshant Das", "Data Science", "Pranjal Kumar", "Hello World", "Machine Learning"])
ser_

0          Eshant Das
1        Data Science
2       Pranjal Kumar
3         Hello World
4    Machine Learning
dtype: object

In [154]:
ser_ = pd.Series(["Eshant Das", "Data Science", "Pranjal Kumar", "Hello World", "Machine Learning"])
ser_.str.split()

0          [Eshant, Das]
1        [Data, Science]
2       [Pranjal, Kumar]
3         [Hello, World]
4    [Machine, Learning]
dtype: object

In [156]:
ser_.str.split()[0]

['Eshant', 'Das']

In [158]:
ser_.str.split()[4]

['Machine', 'Learning']

In [164]:
num = pd.Series(['10/03/1983', '18/09/2008', '27/12/1961', '05/05/2024'])
num

0    10/03/1983
1    18/09/2008
2    27/12/1961
3    05/05/2024
dtype: object

In [170]:
num.str.split()

0    [10/03/1983]
1    [18/09/2008]
2    [27/12/1961]
3    [05/05/2024]
dtype: object

In [172]:
num.str.split('/')

0    [10, 03, 1983]
1    [18, 09, 2008]
2    [27, 12, 1961]
3    [05, 05, 2024]
dtype: object

In [174]:
num.str.split('/')[0]

['10', '03', '1983']

### 5. Contain Function

In [177]:
ser_ = pd.Series(["Eshant Das", "Data @ Science", "Pranjal Kumar", "Hello @ World", "Machine Learning"])
ser_.str.contains('@')

0    False
1     True
2    False
3     True
4    False
dtype: bool

In [187]:
ser_.str.contains('l')

0    False
1    False
2     True
3     True
4    False
dtype: bool

In [189]:
ser_.str.contains('L')

0    False
1    False
2    False
3    False
4     True
dtype: bool

In [191]:
ser_.str.contains('ant')

0     True
1    False
2    False
3    False
4    False
dtype: bool

### 6. Replace Function

In [198]:
ser_.str.contains('@')

0    False
1     True
2    False
3     True
4    False
dtype: bool

In [200]:
ser_.str.replace('@', '')

0          Eshant Das
1       Data  Science
2       Pranjal Kumar
3        Hello  World
4    Machine Learning
dtype: object

In [202]:
ser_.str.replace('Das', 'Dushyant')

0     Eshant Dushyant
1      Data @ Science
2       Pranjal Kumar
3       Hello @ World
4    Machine Learning
dtype: object

### 7. Count Function

In [205]:
ser_.str.count('a')

0    2
1    2
2    3
3    0
4    2
dtype: int64

### 8. Startswith and Endswidth

In [209]:
# This will show that the element start and end with these character or not

In [221]:
ser_.str.endswith('s')

0     True
1    False
2    False
3    False
4    False
dtype: bool

In [225]:
ser_.str.endswith('nce')

0    False
1     True
2    False
3    False
4    False
dtype: bool

In [227]:
ser_.str.startswith('Ma')

0    False
1    False
2    False
3    False
4     True
dtype: bool

### 9. Find Function

In [230]:
ser_.str.find('Learning')

0   -1
1   -1
2   -1
3   -1
4    8
dtype: int64

In [232]:
ser_.str.find('@')

0   -1
1    5
2   -1
3    6
4   -1
dtype: int64

In [234]:
ser_.str.find('H')

0   -1
1   -1
2   -1
3    0
4   -1
dtype: int64

## Converting a Series to List

In [238]:
# Pandas tolist() f/c is used to convert a series into list

In [240]:
ser_.to_list()

['Eshant Das',
 'Data @ Science',
 'Pranjal Kumar',
 'Hello @ World',
 'Machine Learning']

In [244]:
type(ser_.to_list())

list