# Pandas
* Pandera: used for static types declaration

## Pandas core components
* Series: rows & columns
* DataFrame: tabular data (as in excel sheet), consists of series (rows & columns)


In [None]:
%pip install pandera

In [None]:
import pandas as pd # pd: panda's data frame
import pandera as pa

## Series()
* Single row or column is called Series
* used to create series from given data types

In [None]:
import pandas as pd

s1 : pd.Series = pd.Series([1,2,3,4,5])     # list data type can be used to create series
s1                                          # series shows tha values, their index numbers as well as data type

In [None]:
import pandas as pd

s1 : pd.Series = pd.Series((1,2,3,4,5))     # Tuple data type can be used to create series
s1

In [None]:
import pandas as pd

s1 : pd.Series = pd.Series({1,2,3,4,5})     # Set data type can not be used to create series
s1

In [None]:
import pandas as pd

# Dictionary data type can be used to create series

# keys: will become indexes
# values: will be the values

# lists will get default index numbers, starting from 0
# dictionaries can be assigned custom index numbers, in the form of keys

s1 : pd.Series = pd.Series({'a': 10,        
                            'b': 20,
                            'c': 30,
                            'd': 40,
                            'e': 50,})     
s1

### Creating Series from lists

In [None]:
values: list[int] = [1,2,3,4,5]
index1: list[str] = ['a', 'b', 'c', 'd', 'e']

# length of values & indexes must be the same
s1: pd.Series = pd.Series(values, index = index1)
s1

### Multi index Series
* we need to create list of lists for multi indexing

In [None]:
values: list[int] = [1,2,3,4,5]
index1: list[list[str]] = [['a1','a1', 'a1', 'b1', 'b1'],
                           ['a', 'b', 'c', 'd', 'e']]

s1: pd.Series = pd.Series(values, index = index1)
s1

## DataFrame
* Multiple rows & columns (multiple series)

In [None]:
s1: pd.Series = pd.Series([1,2,3,4,5])
s2: pd.Series = pd.Series([10,20,30,40,50])
s3: pd.Series = pd.Series(["ALi", "Li", "Lisa", "Jack", "Jony"])

# key
# value: iterable
# length of all series must be the same
df1: pd.DataFrame = pd.DataFrame({'student id': s1, 'score': s2, 'student name': s3})
df1

In [None]:
s1: pd.Series = pd.Series([1,2,3,4,5], name = "student id")
s2: pd.Series = pd.Series([10,20,30,40,50], name = "score")
s3: pd.Series = pd.Series(["ALi", "Li", "Lisa", "Jack", "Jony"], name = "student name")

df1: pd.DataFrame = pd.DataFrame({'student id': s1, 'score': s2, 'student name': s3})# data being passed in the form of dictionary
df1

In [None]:
s1: pd.Series = pd.Series([1,2,3,4,5], name = "student id")
s2: pd.Series = pd.Series([10,20,30,40,50], name = "score")
s3: pd.Series = pd.Series(["ALi", "Li", "Lisa", "Jack", "Jony"], name = "student name")

df1: pd.DataFrame = pd.concat([s1,s2,s3], axis=1)
df1

### Index & Columns in DataFrame

In [None]:
data : list[list[int]] = [[1,2,3],
                          [4,5,6],
                          [7,8,9]]

# index and column numbers will be default(starting from zero to onward)
df: pd.DataFrame = pd.DataFrame(data)   
df

In [None]:
data : list[list[int]] = [[1,2,3],
                          [4,5,6],
                          [7,8,9]]

# index will be x,y,z
# columns will be default
df: pd.DataFrame = pd.DataFrame(data, index=['x','y','z'])
df

In [35]:
data : list[list[int]] = [[1,2,3],
                          [4,5,6],
                          [7,8,9]]

# index will be x,y,z
# columns will be A,B,C
df: pd.DataFrame = pd.DataFrame(data, index=['x','y','z'], columns=['A', 'B', 'C'])
df

Unnamed: 0,A,B,C
x,1,2,3
y,4,5,6
z,7,8,9


In [None]:
# read and process data from web pages
# We can read different types of data with "pd.read_"
import pandas as pd

dfl: list[pd.DataFrame] = pd.read_html("https://www.w3schools.com/python/python_operators.asp")
dfl         # to read all tables on the page
# df1[0]    # to read the table at 0 index

In [None]:
pd.read_csv?

# Slicing & indexing

* series_variable[index]

* dataFrame
    * loc
    * iloc
    * at 
    * iat

In [None]:
s1: pd.Series = pd.Series([1,2,3,4,5])
display(s1)

display(s1[0])  # slicing by index number

In [None]:
display(s1[0:3])    # first index inclusive, second index exclusive

In [None]:
display(s1[0:4:2])  # starting index, ending index, step

In [None]:
display(s1.iloc[1:3])   # index location

In [None]:
s1: pd.Series = pd.Series([1,2,3,4,5], index = ['a', 'b', 'c', 'd', 'e'])
# display(s1)

display(s1.iloc[2:3])  # slicing by numerical indexing

In [None]:
s1: pd.Series = pd.Series([1,2,3,4,5], index = ['a', 'b', 'c', 'd', 'e'])
# display(s1)

display(s1.loc['b':'d'])  # index location (label), end included

In [None]:
s1: pd.Series = pd.Series([1,2,3,4,5], index = ['a', 'b', 'c', 'd', 'e'])
# display(s1)

display(s1.iat[2])  # index location (number), extract value of one particular cell, we can update the value of cell too

In [None]:
s1: pd.Series = pd.Series([1,2,3,4,5], index = ['a', 'b', 'c', 'd', 'e'])
# display(s1)

display(s1.at['b'])  # index location (label), extract value of one particular cell, we can update the value of cell too