In [2]:
%pip install pandera

Collecting pandera
  Obtaining dependency information for pandera from https://files.pythonhosted.org/packages/04/f8/e00c7a27b5a7c13bc51baa8f75f4c55d8fa2549ff96402699ac56b864d6d/pandera-0.17.2-py3-none-any.whl.metadata
  Downloading pandera-0.17.2-py3-none-any.whl.metadata (15 kB)
Collecting multimethod (from pandera)
  Obtaining dependency information for multimethod from https://files.pythonhosted.org/packages/7f/bd/750245e47e7f307d9f94d4fa84727f4ed9956005dfa671d58be1d531a0f6/multimethod-1.10-py3-none-any.whl.metadata
  Downloading multimethod-1.10-py3-none-any.whl.metadata (8.2 kB)
Collecting pydantic (from pandera)
  Obtaining dependency information for pydantic from https://files.pythonhosted.org/packages/73/66/0a72c9fcde42e5650c8d8d5c5c1873b9a3893018020c77ca8eb62708b923/pydantic-2.4.2-py3-none-any.whl.metadata
  Downloading pydantic-2.4.2-py3-none-any.whl.metadata (158 kB)
     ---------------------------------------- 0.0/158.6 kB ? eta -:--:--
     ------- ----------------------

In [1]:
import pandas as pd
import pandera as pa

# Pandas core component
* Series types
* DataFrame types

In [2]:
s1 : pd.Series = pd.Series([1,2,3,4])
s1

0    1
1    2
2    3
3    4
dtype: int64

In [3]:
s1 : pd.Series = pd.Series({1,2,3,4})
s1

TypeError: 'set' type is unordered

In [4]:
s1 : pd.Series = pd.Series((1,2,3,4))
s1

0    1
1    2
2    3
3    4
dtype: int64

In [6]:
s1 : pd.Series = pd.Series({"a": 10,
                            "b": 20,
                            "c": 30,
                            "d": 40,
                            "e": 50,
                            "f": 60,})

s1

a    10
b    20
c    30
d    40
e    50
f    60
dtype: int64

In [7]:
values : list[int] = [1,2,3,4,5]

index1 :list[str] = ["a", "b", "c", "d", "e"]

s1 : pd.Series = pd.Series(values, index=index1)
s1

a    1
b    2
c    3
d    4
e    5
dtype: int64

In [8]:
values : list[int] = [1,2,3,4,5]

index1 :list[list[str]] = [["a1", "a1", "a1", "b1", "b1"],
                           ["a", "b", "c", "d", "e"]]

s1 : pd.Series = pd.Series(values, index=index1)
s1

a1  a    1
    b    2
    c    3
b1  d    4
    e    5
dtype: int64

# DataFrame

In [3]:
import pandas as pd
import pandera as pa

# key
# values: iterable
# length should be same as length of others series

s1 : pd.Series = pd.Series([1,2,3,4,5], name="Student id")
s2 : pd.Series = pd.Series([10,20,30,40,50], name="Student score")
s3 : pd.Series = pd.Series(["Hamza","Azan","Hamid","Kamran","Masood"], name="Student name")

df1 : pd.DataFrame = pd.DataFrame({"Student id":s1, "Student score":s2, "Student name":s3})

df1



Unnamed: 0,Student id,Student score,Student name
0,1,10,Hamza
1,2,20,Azan
2,3,30,Hamid
3,4,40,Kamran
4,5,50,Masood


In [4]:
import pandas as pd
import pandera as pa

s1 : pd.Series = pd.Series([1,2,3,4,5], name="Student id")
s2 : pd.Series = pd.Series([10,20,30,40,50], name="Student score")
s3 : pd.Series = pd.Series(["Hamza","Azan","Hamid","Kamran","Masood"], name="Student name")

df1 : pd.DataFrame = pd.DataFrame([s1,s2,s3])

df1



Unnamed: 0,0,1,2,3,4
Student id,1,2,3,4,5
Student score,10,20,30,40,50
Student name,Hamza,Azan,Hamid,Kamran,Masood


In [6]:
import pandas as pd
import pandera as pa

s1 : pd.Series = pd.Series([1,2,3,4,5], name="Student id")
s2 : pd.Series = pd.Series([10,20,30,40,50], name="Student score")
s3 : pd.Series = pd.Series(["Hamza","Azan","Hamid","Kamran","Masood"], name="Student name")

df1 : pd.DataFrame = pd.concat([s1,s2,s3], axis=1)

df1



Unnamed: 0,Student id,Student score,Student name
0,1,10,Hamza
1,2,20,Azan
2,3,30,Hamid
3,4,40,Kamran
4,5,50,Masood


In [7]:
data : list[list[int]] = [[1,2,3],
                          [4,5,6],
                          [7,8,9]]

df : pd.DataFrame = pd.DataFrame(data)
df

Unnamed: 0,0,1,2
0,1,2,3
1,4,5,6
2,7,8,9


In [8]:
data : list[list[int]] = [[1,2,3],
                          [4,5,6],
                          [7,8,9]]

df : pd.DataFrame = pd.DataFrame(data, columns=['A','B','C'])
df

Unnamed: 0,A,B,C
0,1,2,3
1,4,5,6
2,7,8,9


In [9]:
data : list[list[int]] = [[1,2,3],
                          [4,5,6],
                          [7,8,9]]

df : pd.DataFrame = pd.DataFrame(data, columns=['A','B','C'], index=['x','y','z'])
df

Unnamed: 0,A,B,C
x,1,2,3
y,4,5,6
z,7,8,9


In [10]:
df.columns

Index(['A', 'B', 'C'], dtype='object')

In [11]:
df.index

Index(['x', 'y', 'z'], dtype='object')

In [12]:
df.values

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]], dtype=int64)

In [19]:
from nptyping import NDArray, Shape
from typing import Any
import numpy as np

data : NDArray[Shape["Size,Size"],Any] = np.arange(10*10).reshape(10,10)
data

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35, 36, 37, 38, 39],
       [40, 41, 42, 43, 44, 45, 46, 47, 48, 49],
       [50, 51, 52, 53, 54, 55, 56, 57, 58, 59],
       [60, 61, 62, 63, 64, 65, 66, 67, 68, 69],
       [70, 71, 72, 73, 74, 75, 76, 77, 78, 79],
       [80, 81, 82, 83, 84, 85, 86, 87, 88, 89],
       [90, 91, 92, 93, 94, 95, 96, 97, 98, 99]])

In [2]:
from nptyping import NDArray, Shape
from typing import Any
import numpy as np
import pandas as pd


data : NDArray[Shape["Size,Size"],Any] = np.arange(10*10).reshape(10,10)
df : pd.DataFrame = pd.DataFrame(data, columns=list("ABCDEFGHIJ"))
df

Unnamed: 0,A,B,C,D,E,F,G,H,I,J
0,0,1,2,3,4,5,6,7,8,9
1,10,11,12,13,14,15,16,17,18,19
2,20,21,22,23,24,25,26,27,28,29
3,30,31,32,33,34,35,36,37,38,39
4,40,41,42,43,44,45,46,47,48,49
5,50,51,52,53,54,55,56,57,58,59
6,60,61,62,63,64,65,66,67,68,69
7,70,71,72,73,74,75,76,77,78,79
8,80,81,82,83,84,85,86,87,88,89
9,90,91,92,93,94,95,96,97,98,99


# Slicing and indexing
* series_variable[inex]
* dataFrame
    * loc
    * iloc
    * at
    * iat

In [3]:
s1 : pd.Series = pd.Series([1,2,3,4,5])
display(s1)

print("Applying slicing")
display(s1[1])

0    1
1    2
2    3
3    4
4    5
dtype: int64

Applying slicing


2

In [4]:
s1 : pd.Series = pd.Series([1,2,3,4,5])
display(s1)

print("Applying slicing")
display(s1[1:4])

0    1
1    2
2    3
3    4
4    5
dtype: int64

Applying slicing


1    2
2    3
3    4
dtype: int64

In [2]:
from nptyping import NDArray, Shape
from typing import Any
import numpy as np
import pandas as pd

In [4]:
s1 : pd.Series = pd.Series([1,2,3,4,5], index=['a','b','c','d','e'])
display(s1)

print("Applying slicing")
display(s1.loc["a":"d"])

a    1
b    2
c    3
d    4
e    5
dtype: int64

Applying slicing


a    1
b    2
c    3
d    4
dtype: int64