# Pandas

In [1]:
import pandas as pd
import pandera as pa

In [2]:
# pandas Series using list
s1 : pd.Series = pd.Series([1, 2, 3,4,5])
s1

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [3]:
# Series using Dictionary
s1 : pd.Series = pd.Series(
    {
        'name':'Sarmad',
        'email':'sarmad@gmail.com',
        'age':19
     }
    )

s1

name               Sarmad
email    sarmad@gmail.com
age                    19
dtype: object

In [4]:
values : list[int] = [1, 2, 3, 4]
index : list[str] = ['a', 'b', 'c','d']

s1 : pd.Series = pd.Series(values,index=index)
s1

a    1
b    2
c    3
d    4
dtype: int64

In [5]:
values : list[int] = [1, 2, 3, 4]
index : list[list[str]] = [['a', 'b', 'c','d'],['e','f','g','h']]

s1 : pd.Series = pd.Series(values,index=index,name="Student Data")
s1

# The name can be anything.

a  e    1
b  f    2
c  g    3
d  h    4
Name: Student Data, dtype: int64

In [6]:
# Series using python sets
series: pd.Series = pd.Series(('Sarmad','sarmad@gmail.com',19))
series

0              Sarmad
1    sarmad@gmail.com
2                  19
dtype: object

In [7]:
# data to validate
df = pd.DataFrame({
    "column1": [1, 4, 0, 10, 9],
    "column2": [-1.3, -1.4, -2.9, -10.1, -20.4],
    "column3": ["value_1", "value_2", "value_3", "value_2", "value_1"],
})

# define schema
schema = pa.DataFrameSchema({
    "column1": pa.Column(int, checks=pa.Check.le(10)),
    "column2": pa.Column(float, checks=pa.Check.lt(-1.2)),
    "column3": pa.Column(str, checks=[
        pa.Check.str_startswith("value_"),
        # define custom checks as functions that take a series as input and
        # outputs a boolean or boolean Series
        pa.Check(lambda s: s.str.split("_", expand=True).shape[1] == 2)
    ]),
})

validated_df = schema(df)
print(validated_df)

   column1  column2  column3
0        1     -1.3  value_1
1        4     -1.4  value_2
2        0     -2.9  value_3
3       10    -10.1  value_2
4        9    -20.4  value_1


## DataFrame

In [8]:
# create a dictionary with the three series
data = {'Student Id': pd.Series([1, 2, 3, 4, 5], name="Student Id"),
        'Student Score': pd.Series([10, 20, 30, 40, 50], name="Student Score"),
        'Student name': pd.Series(["Sarmad", "Hammad", 'Ali', 'Akmal', 'Jawad'], name="Student name")}

# create a dataframe from the dictionary
df = pd.DataFrame(data)

# display the dataframe in a Jupyter notebook
df


Unnamed: 0,Student Id,Student Score,Student name
0,1,10,Sarmad
1,2,20,Hammad
2,3,30,Ali
3,4,40,Akmal
4,5,50,Jawad


In [19]:
s1 : pd.Series = pd.Series([1, 2, 3,4,5],name="Student Id")
s2 : pd.Series = pd.Series([10, 20, 30,40,50],name="Student Score")
s3 : pd.Series = pd.Series(["Sarmad","Hammad",'Ali','Akmal','Jawad'],name="Student name")

df1 = pd.DataFrame = pd.DataFrame({"Student Id":s1,"score":s2,"student name":s3})

df1


TypeError: 'DataFrame' object is not callable

In [18]:
from nptyping import Shape
from nptyping import NDArray, Int64
from typing import Any
import numpy as np

data : NDArray[Shape['10,10'],Any] = np.arange(10*10).reshape(10,10)

df: pd.DataFrame = pd.DataFrame(data,columns=data)

df

TypeError: 'DataFrame' object is not callable