In [1]:
!pip install pandas



In [8]:
import pandas as pd
import numpy as np

#1. Working with Pandas Series

#### a) Creating Series

A Pandas Series is a one-dimensional labeled array capable of holding any data type (integers, strings, floats, etc.). It is similar to a column in a spreadsheet or a database table, with labels (called the index) associated with each value, allowing for intuitive data alignment and access. Series are foundational in pandas, serving as the building blocks for more complex data structures like DataFrames.

**Series Through list**

In [4]:
pd.__version__

'2.2.2'

In [7]:
lst = [1,2,3,4,5]
print(pd.Series(lst))

0    1
1    2
2    3
3    4
4    5
dtype: int64


**Series through Numpy Array**

In [11]:
arr = np.array([1,2,3,4,5])

print(pd.Series(arr))


0    1
1    2
2    3
3    4
4    5
dtype: int64


**Giving Index from my end**

In [15]:
print(pd.Series(index = [1,2,3,4,5], data = ['Ainadri', 'Ana', 'Soumya', 'Subhankar', 'Ritesh']))

1      Ainadri
2          Ana
3       Soumya
4    Subhankar
5       Ritesh
dtype: object


**Series Through dictionary values**

In [16]:
steps = {'day_1': 4000, 'day_2': 4200,'day_3': 4400,'day_4': 4700}
print(pd.Series(steps))

day_1    4000
day_2    4200
day_3    4400
day_4    4700
dtype: int64


**Using `repeat()` function along with creating a series**

In [18]:
pd.Series(5).repeat(3)

Unnamed: 0,0
0,5
0,5
0,5


We can use the `reset()` function to make the index accurate

In [21]:
pd.Series(5).repeat(3).reset_index(drop = True)  #the drop=True is used to discard the old index after calling reset_index().



Unnamed: 0,0
0,5
1,5
2,5


In [22]:
s = pd.Series([10,20]).repeat([5,3]).reset_index(drop = True)
s

Unnamed: 0,0
0,10
1,10
2,10
3,10
4,10
5,20
6,20
7,20


**Accesing elements**

In [24]:
print(s[0])

10


In [29]:
print(s[-1:])

7    20
dtype: int64


#### b) Aggregate function on pandas Series

Aggregate functions on a Pandas Series perform summary computations, such as calculating statistics like sum, mean, median, min, max, count, and standard deviation.

In [38]:
sr = pd.Series([1,2,3,4,5,6,7])
print(sr.agg(['min', 'max', 'sum', 'mean', 'median','count']))

min        1.0
max        7.0
sum       28.0
mean       4.0
median     4.0
count      7.0
dtype: float64


#### c) Series absolute function

The absolute function on a Pandas Series, accessed using .abs(), returns a new Series with the absolute (non-negative) values of each element. It is commonly used to eliminate negative signs from numerical data, enabling easier comparison, analysis, or aggregation without regard to direction.



In [40]:
sr = pd.Series([1,-2,-3,-4,-5,6,7])
print(sr.abs())

0    1
1    2
2    3
3    4
4    5
5    6
6    7
dtype: int64


#### d) Concatenating Series

Appending in Pandas Series is the process of combining two or more Series objects into a single Series, typically done using the `pd.concat()`. It preserves the original indices unless reset, allowing for flexible data stacking.

Syntax: `pd.concat([series1, series2])`

In [44]:
sr1 = pd.Series([1,2,3,4,5,6,7])
sr2 = pd.Series([1,-2,-5,6,7])
print(pd.concat([sr1,sr2]).reset_index(drop = True))

0     1
1     2
2     3
3     4
4     5
5     6
6     7
7     1
8    -2
9    -5
10    6
11    7
dtype: int64


#### e) Astype function

The `.astype()` function in Pandas is used to convert the data type of a Series to a specified type, such as int, float, str, or even custom types.

Syntax: `series.astype(dtype)`

In [50]:
print(type(sr1[1]))

<class 'numpy.int64'>


In [47]:
sr2 = pd.Series([1,-2,-5,6,7])

sr2.astype('float')

Unnamed: 0,0
0,1.0
1,-2.0
2,-5.0
3,6.0
4,7.0


#### f) Between function
The `.between()` function in Pandas is used to check whether each element in a Series lies between two boundary values, inclusive by default. It returns a Boolean Series

Syntax: `series.between(lower, upper, inclusive='both')`

* `lower`: lower bound

* `upper`: upper bound

* `inclusive`: `'both'` (default), `'left'`, `'right'`, or `'neither'`

In [53]:
sr1 = pd.Series([1,2,3,4,5,9])
sr1.between(4,11)

Unnamed: 0,0
0,False
1,False
2,False
3,True
4,True
5,True


#### g) All string functions can be used to extract or modify texts in a series

* Upper and Lower Function
* Len Function
* Strip Function
* Split Function
* Contains Function
* Replace Function
* Count Function
* Stratswith and Endswith Function
* Find Function

In [66]:
ser = pd.Series(['  Eshant Das  ', '  Data Science  ', '  Hello World  ', '  Artificial Intelligence    ', '   Machine Learning  '])


Upper and Lower Function

In [67]:
print(ser.str.upper())

print('-' * 30 )

print(ser.str.lower())

0                     ESHANT DAS  
1                   DATA SCIENCE  
2                    HELLO WORLD  
3      ARTIFICIAL INTELLIGENCE    
4               MACHINE LEARNING  
dtype: object
------------------------------
0                     eshant das  
1                   data science  
2                    hello world  
3      artificial intelligence    
4               machine learning  
dtype: object


`len()`

In [68]:
for i in ser:
  print(i , len(i))


  Eshant Das   14
  Data Science   16
  Hello World   15
  Artificial Intelligence     29
   Machine Learning   21


`strip()`

In [74]:
print(ser)

0                     Eshant Das  
1                   Data Science  
2                    Hello World  
3      Artificial Intelligence    
4               Machine Learning  
dtype: object


In [75]:
print(ser.str.strip())

0                 Eshant Das
1               Data Science
2                Hello World
3    Artificial Intelligence
4           Machine Learning
dtype: object


`split()`

In [79]:
print(pd.Series(['10/3/1983', '10/4/1994', '29/10/1998']))

print('-' * 30 )

print(pd.Series(['10/3/1983', '10/4/1994', '29/10/1998']).str.split())

print('-' * 30 )

print(pd.Series(['10/3/1983', '10/4/1994', '29/10/1998']).str.split('/'))



0     10/3/1983
1     10/4/1994
2    29/10/1998
dtype: object
------------------------------
0     [10/3/1983]
1     [10/4/1994]
2    [29/10/1998]
dtype: object
------------------------------
0     [10, 3, 1983]
1     [10, 4, 1994]
2    [29, 10, 1998]
dtype: object


In [71]:
ser.str.split()

Unnamed: 0,0
0,"[Eshant, Das]"
1,"[Data, Science]"
2,"[Hello, World]"
3,"[Artificial, Intelligence]"
4,"[Machine, Learning]"


`contains()`

In [92]:
ser = pd.Series(['   Eshant Das', 'Data Science', 'Hello World', 'Artificial Intelligence', 'Machine Learning'])
ser.str.contains("i")

Unnamed: 0,0
0,False
1,True
2,False
3,True
4,True


`replace()`

In [93]:
ser.str.replace('i','*')

Unnamed: 0,0
0,Eshant Das
1,Data Sc*ence
2,Hello World
3,Art*f*c*al Intell*gence
4,Mach*ne Learn*ng


`count()`

In [94]:
ser.str.count('a')

Unnamed: 0,0
0,2
1,2
2,0
3,1
4,2


`startswith()` and `endswith()`

In [96]:
ser.str.endswith('ce')

Unnamed: 0,0
0,False
1,True
2,False
3,True
4,False


In [97]:
ser.str.startswith('A')

Unnamed: 0,0
0,False
1,False
2,False
3,True
4,False


`find()`

In [98]:
ser = pd.Series(['   Eshant Das', 'Data Science', 'Hello World', 'Artificial Intelligence', 'Machine Learning'])

ser.str.find('a')

Unnamed: 0,0
0,6
1,1
2,-1
3,8
4,1


#### h) Converting a Series to List

In [99]:
ser.to_list()

['   Eshant Das',
 'Data Science',
 'Hello World',
 'Artificial Intelligence',
 'Machine Learning']

# 2. Pandas Dataframe

#### a) Creating Data Frames

Creating a dataframe using a list

In [100]:
lst = ['Ainadri', 'Mandal', 'Subhankar', 'Saha', 'Soumya', 'Panda']

pd.DataFrame(lst)

Unnamed: 0,0
0,Ainadri
1,Mandal
2,Subhankar
3,Saha
4,Soumya
5,Panda


In [101]:
lst = [['tom', 10],['jerry', 12], ['spike', 14]]
pd.DataFrame(lst)

Unnamed: 0,0,1
0,tom,10
1,jerry,12
2,spike,14


Creating DataFrame from dict of ndarray/lists:

In [102]:
data = {'Name': ['Tom', 'Jack', 'Steve', 'Ricky'], 'Age': [28, 34, 29, 42]}
pd.DataFrame(data)

Unnamed: 0,Name,Age
0,Tom,28
1,Jack,34
2,Steve,29
3,Ricky,42


How to make it multidimensional

In [106]:
data = {'Name': ['Tom', 'Jack', 'Steve', 'Ricky'],
        'Age': [28, 34, 29, 42],
        'Gender': ['M', 'M', 'M', 'F'],
        'City': ['Delhi', 'Mumbai', 'Goa', 'Kerala'],
        'Qualification': ['MSc', 'MA', 'MCA', 'Phd']}
df = pd.DataFrame(data)

df

Unnamed: 0,Name,Age,Gender,City,Qualification
0,Tom,28,M,Delhi,MSc
1,Jack,34,M,Mumbai,MA
2,Steve,29,M,Goa,MCA
3,Ricky,42,F,Kerala,Phd


In [108]:
df[['Name', 'Age', 'City']]

Unnamed: 0,Name,Age,City
0,Tom,28,Delhi
1,Jack,34,Mumbai
2,Steve,29,Goa
3,Ricky,42,Kerala


#### `b) Slicing in DataFrames Using iloc and loc`