## Pandas

In [1]:
# def: Pandas is used for data manipulation and analysis.
# 1. Pandas is a open source library.
# 2. Pandas is designed over the numpy. pandas data structures are converted to the numpy and then executed.
# 3. Data structures in pandas such as series and dataframe convert to numpy 1D and 2D arrays.

### Important list of operations in pandas

In [2]:
# 1. Data structures (Series, Dataframe)
# 2. Data input and output (Excel, CSV, SQL, JSON)
# 3. Understand the dataset (head(), tail(), dtype,...)
# 4. Data filtering and indexing (loc, iloc, where(),...)
# 5. Handling missing values (NAN/NULL)
# 6. Remove the duplicate values
# 7. Handle inconsistant data (Male, M, Ma,...)
# 8. Data transformation (apply(),...)
# 9. Aggregation and grouping functions
# 10. Time series related functions
# 11. Data visualization

In [3]:
import pandas as pd

In [5]:
pd.__version__

'2.2.3'

## 1. Data Structures (Series, Dataframe)

### Series Data Structure

In [6]:
# Series is a 1D array.
# In machine learning we store the "Target" in the dataset as a series

In [7]:
# empty series
ser = pd.Series()
print(ser)

Series([], dtype: object)


In [9]:
# Series from the python list
list1 = [10,20,30,40,50]
ser = pd.Series(list1)
print(ser)

0    10
1    20
2    30
3    40
4    50
dtype: int64


In [10]:
# Series from the python tuple
tuple1 = (1,'rithu',67,30.7)
ser = pd.Series(tuple1)
print(ser)

0        1
1    rithu
2       67
3     30.7
dtype: object


In [13]:
# Create series from the numpy array
import numpy as np
arr = np.array([67,89,34,80,90])
ser = pd.Series(arr)
print(ser)
print(type(ser))

0    67
1    89
2    34
3    80
4    90
dtype: int64
<class 'pandas.core.series.Series'>


In [14]:
print(ser[3])

80


In [15]:
# print(ser[-3]) is not valid
# raises KeyError: -3
# Pandas series do not support negative indexing

In [18]:
data = np.array(["Apple","Mango","Orange","Grape"])
ser = pd.Series(data, index = ['A','M','O','G'])
print(ser)

A     Apple
M     Mango
O    Orange
G     Grape
dtype: object


In [19]:
ser['G']

'Grape'

In [21]:
# convert the dictionary into pandas series
d1 = {101:'Sruthi',105:'Akshay',103:'prema',102:'roshan'}
ser = pd.Series(d1)
print(ser)

101    Sruthi
105    Akshay
103     prema
102    roshan
dtype: object


In [22]:
set1 = {'A','B','C','D','E'}
# ser5 = pd.Series(sett) # TypeError: 'set' type is unordered: so it cannot be converted to a series
ser = pd.Series(list(set1)) # indirect conversion of set to Series
print(ser)

0    D
1    C
2    B
3    A
4    E
dtype: object


In [25]:
# convert the scaler to pandas series
s = 0
ser = pd.Series(s)
print(ser)
ser = pd.Series(s,index = [1,2,3,4,5])
print(ser)

0    0
dtype: int64
1    0
2    0
3    0
4    0
5    0
dtype: int64
