# Pandas

In [2]:
import pandas as pd
import pandera as pa

In [2]:
# pandas Series using list
s1 : pd.Series = pd.Series([1, 2, 3,4,5])
s1

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [3]:
# Series using Dictionary
s1 : pd.Series = pd.Series(
    {
        'name':'Sarmad',
        'email':'sarmad@gmail.com',
        'age':19
     }
    )

s1

name               Sarmad
email    sarmad@gmail.com
age                    19
dtype: object

In [4]:
values : list[int] = [1, 2, 3, 4]
index : list[str] = ['a', 'b', 'c','d']

s1 : pd.Series = pd.Series(values,index=index)
s1

a    1
b    2
c    3
d    4
dtype: int64

In [5]:
values : list[int] = [1, 2, 3, 4]
index : list[list[str]] = [['a', 'b', 'c','d'],['e','f','g','h']]

s1 : pd.Series = pd.Series(values,index=index,name="Student Data")
s1

# The name can be anything.

a  e    1
b  f    2
c  g    3
d  h    4
Name: Student Data, dtype: int64

In [6]:
# Series using python sets
series: pd.Series = pd.Series(('Sarmad','sarmad@gmail.com',19))
series

0              Sarmad
1    sarmad@gmail.com
2                  19
dtype: object

## DataFrame

In [6]:
# create a dictionary with the three series
data = {'Student Id': pd.Series([1, 2, 3, 4, 5], name="Student Id"),
        'Student Score': pd.Series([10, 20, 30, 40, 50], name="Student Score"),
        'Student name': pd.Series(["Sarmad", "Hammad", 'Ali', 'Akmal', 'Jawad'], name="Student name")}

# create a dataframe from the dictionary
df :pd.DataFrame = pd.DataFrame(data)

# display the dataframe in a Jupyter notebook
df


Unnamed: 0,Student Id,Student Score,Student name
0,1,10,Sarmad
1,2,20,Hammad
2,3,30,Ali
3,4,40,Akmal
4,5,50,Jawad


In [1]:
s1 : pd.Series = pd.Series([1, 2, 3,4,5],name="Student Id")
s2 : pd.Series = pd.Series([10, 20, 30,40,50],name="Student Score")
s3 : pd.Series = pd.Series(["Sarmad","Hammad",'Ali','Akmal','Jawad'],name="Student name")

df1 : pd.DataFrame = pd.DataFrame({"Student Id":s1,"Score":s2,"Student Name":s3})

df1

Unnamed: 0,Student Id,Score,Student Name
0,1,10,Sarmad
1,2,20,Hammad
2,3,30,Ali
3,4,40,Akmal
4,5,50,Jawad


In [9]:
df : pd.DataFrame = pd.read_json('https://www.w3schools.com/python/pandas/data.js')
df

Unnamed: 0,Duration,Pulse,Maxpulse,Calories
0,60,110,130,409.1
1,60,117,145,479.0
2,60,103,135,340.0
3,45,109,175,282.4
4,45,117,148,406.0
...,...,...,...,...
164,60,105,140,290.8
165,60,110,145,300.4
166,60,115,145,310.2
167,75,120,150,320.4


In [11]:
# data to validate
df = pd.DataFrame({
    "column1": [1, 4, 0, 10, 9],
    "column2": [-1.3, -1.4, -2.9, -10.1, -20.4],
    "column3": ["value_1", "value_2", "value_3", "value_2", "value_1"],
})

# define schema
schema = pa.DataFrameSchema({
    "column1": pa.Column(int, checks=pa.Check.le(10)),
    "column2": pa.Column(float, checks=pa.Check.lt(-1.2)),
    "column3": pa.Column(str, checks=[
        pa.Check.str_startswith("value_"),
        # define custom checks as functions that take a series as input and
        # outputs a boolean or boolean Series
        pa.Check(lambda s: s.str.split("_", expand=True).shape[1] == 2)
    ]),
})

validated_df = schema(df)
print(validated_df)

   column1  column2  column3
0        1     -1.3  value_5
1        4     -1.4  value_2
2        0     -2.9  value_3
3       10    -10.1  value_2
4        9    -20.4  value_1


In [7]:
data : list[list[int]] = [
    [1,2,3],
    [4,5,6],
    [7,8,9]
    ]

table: pd.DataFrame = pd.DataFrame(data,columns=['A','B','C'],index=[1,2,3])
table

Unnamed: 0,A,B,C
1,1,2,3
2,4,5,6
3,7,8,9


In [9]:
table.keys

<bound method NDFrame.keys of    A  B  C
1  1  2  3
2  4  5  6
3  7  8  9>

In [10]:
table.columns

Index(['A', 'B', 'C'], dtype='object')

In [12]:
table.index

Index([1, 2, 3], dtype='int64')

In [13]:
table.values

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]], dtype=int64)

In [11]:
import numpy as np
from nptyping import NDArray, Shape, Int64

arr : NDArray[Shape['10'],Int64] = np.arange(8*10).reshape(8,10)

table: pd.DataFrame = pd.DataFrame(arr)

table

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0,1,2,3,4,5,6,7,8,9
1,10,11,12,13,14,15,16,17,18,19
2,20,21,22,23,24,25,26,27,28,29
3,30,31,32,33,34,35,36,37,38,39
4,40,41,42,43,44,45,46,47,48,49
5,50,51,52,53,54,55,56,57,58,59
6,60,61,62,63,64,65,66,67,68,69
7,70,71,72,73,74,75,76,77,78,79


## Regex

In [1]:
x : str = """
21:00:43 From PIAIC80919 Muhammad Asad to Everyone:
	Assalamu Aliakum
21:01:33 From Faizan Hassan to Everyone:
	s1.name kar k kar saktay hon ge
21:01:43 From PIAIC80919 Muhammad Asad to Everyone:
	I joined late on zoom meet so kindly share links that sir share until now
21:02:26 From Taif Ullah to Everyone:
	Replying to "I joined late on zoo..."
	
	every thing will be on github
21:04:19 From PIAIC80919 Muhammad Asad to Everyone:
	Numpy aur Pandas kay liya sir nay koi book share ki hai kay nahi
21:04:42 From Amanat Wattoo to Everyone:
	Replying to "Numpy aur Pandas kay..."
	
	no
21:05:21 From PIAIC80919 Muhammad Asad to Everyone:
	aur python kay liya koi alag WhatsApp group hai to please uska b link share kardien
21:06:00 From Amanat Wattoo to Everyone:
	Replying to "aur python kay liya ..."
	
	no koi group nhi bnia h
21:07:17 From Abdullah to Everyone:
	upper array wala code knsa ha??
21:07:51 From Hamza to Everyone:
	"Shape", "Shape"
21:08:10 From Hamza to Everyone:
	"Size", "Size"
21:08:26 From SheikhMAqib to Everyone:
	Double coat
21:08:33 From Azfar Suhail to Everyone:
	Shape turtle se import kara hai
21:08:51 From Azfar Suhail to Everyone:
	shape nptyping se import nhi howa
21:09:34 From Muhammad Uzair to Everyone:
	shape galat import ha
21:09:35 From Khadija Zahid to Everyone:
	shape ko kindly ek br explain kr den dbra
21:09:38 From PIAIC80919 Muhammad Asad to Qasim(CGAIO)(Direct Message):
	Assalamu Aliakum Sir Kindly sir mujhe bta dien k Data Science kay liya Math aur Statistic kay kon si books and courses hum karien
21:09:50 From Yasir to Everyone:
	shape import
21:09:53 From Yasir to Everyone:
	missing
21:09:56 From Muhammad Uzair to Everyone:
	from import typing shape
21:10:11 From Saboor Hussain to Everyone:
	sir
21:10:19 From Saboor Hussain to Everyone:
	aap nptyping se shape ko import karen
21:10:21 From Yasir to Everyone:
	import nhi kia shape
21:10:24 From farhan to Everyone:
	Shape import nahi thi
21:10:25 From Azfar Suhail to Everyone:
	turtle se import kara hai
21:14:10 From sadia to Everyone:
	can we get values 0 1 2 3 4 5 6 7 8 in vertical, abhi data horizontal arha hai
21:18:10 From Faiz M to Everyone:
	Sir chezy hard sy hard hoti ja rahe hy. aaj tho sir k oper oper ja raha hy.
21:18:56 From Qasim(CGAIO) to Everyone:
	https://www.w3schools.com/python/pandas/data.js
21:19:20 From Khadija Zahid to Everyone:
	html wala b ek br code dekha k bta de plz
21:19:22 From Abdullah to Everyone:
	kindly class k bd groups me sessions ka link send kr dia kryen
21:21:27 From Abdullah to Everyone:
	Replying to "kindly class k bd gr..."
	
	@Ikhlas Bhojani
21:22:11 From Ahmed Siddiqui to Everyone:
	what if data size in millions, what kind of preprocessing is required before handing over to pandas?
21:22:41 From Ikhlas Bhojani to Everyone:
	Replying to "kindly class k bd gr..."
	
	me ap logo ke group me nhi hn
21:22:59 From Abdullah to Everyone:
	Replying to "kindly class k bd gr..."
	
	bro sir sy kah dyen 
	ya sir Imran sy request kr dyen
21:24:13 From Ikhlas Bhojani to Everyone:
	Replying to "html wala b ek br co..."
	
	pd.read_html("url")
21:26:02 From Kaleem to Everyone:
	how to make identical data ?
21:26:19 From Afifa Dar to Everyone:
	colmn3 k chexk me ==2 se kya horaha ?
21:29:40 From PIAIC80919 Muhammad Asad to Qasim(CGAIO)(Direct Message):
	sir assignment b day dein practice kay liya
21:32:29 From Ali Zar FSD to Everyone:
	sliding
21:32:41 From Ali Zar FSD to Everyone:
	likha gya sir
21:33:00 From fahad rasheed to Everyone:
	sir thora data large kryn plx
21:33:18 From fahad rasheed to Everyone:
	for slicing thora data bardhae
21:39:55 From raheela to Everyone:
	Name is tort ???
21:40:46 From Naveed Delattre to Everyone:
	it’s toad
21:41:45 From Altaf Hussain to Everyone:
	PIAIC-173738
21:41:51 From Hamza to Everyone:
	PIAIC-201785
21:41:52 From jhon wick to Everyone:
	piaic 223880
21:41:54 From Hina Zargham to Everyone:
	PIAIC101499
21:41:54 From Hatif Humayun to Everyone:
	PIAIC-52822
21:41:54 From Ahmed Siddiqui to Everyone:
	PIAIC123456
21:41:56 From Arif Najmi to Everyone:
	125657
21:42:00 From Rehan Baig - PIAIC73919 to Everyone:
	PIAIC73919
21:42:00 From STONE to Everyone:
	ZAM - 786
21:42:01 From M. Waheed Iqbal (PIAIC_126369) to Everyone:
	PIAIC_126369
21:42:03 From . to Everyone:
	PIAIC210905
21:42:06 From ABDUL KHALIQ to Everyone:
	PIAIC-604031
21:42:11 From Arshad Siddiqui to Everyone:
	PIAIC120702
21:42:13 From Ali Zar FSD to Everyone:
	PIaic 223972
21:42:13 From Azfar Suhail to Everyone:
	PIAIC218333
21:42:14 From Kamran Ahmed to Everyone:
	PIAIC139495
21:42:18 From Ahmed to Everyone:
	216511
21:42:20 From Ayesha Arshad to Everyone:
	PIAIC-225620
21:42:25 From Kamal Hassan to Everyone:
	PIAIC58320
21:42:29 From Ahmed to Everyone:
	PIAIC-2165111
	Jahan msg likhte hen uske neeche three dot he
21:46:30 From Amanat Wattoo to Everyone:
	Replying to "@Ikhlas Bhojani bhai..."
	
	ok find thanks
21:47:27 From Amanat Wattoo to Everyone:
	Replying to "@Ikhlas Bhojani bhai..."
	
	found
"""

import re 

patterns : str = r'''
(\d{2}:\d{2}:\d{2}) From (.*) to Everyone:
	(PIAIC-? ?\d{5,6})
'''

data : list[list[str]] = re.findall(patterns, x)

data

[('21:41:45', 'Altaf Hussain', 'PIAIC-173738'),
 ('21:41:54', 'Hina Zargham', 'PIAIC101499'),
 ('21:41:54', 'Ahmed Siddiqui', 'PIAIC123456'),
 ('21:42:00', 'Rehan Baig - PIAIC73919', 'PIAIC73919'),
 ('21:42:03', '.', 'PIAIC210905'),
 ('21:42:11', 'Arshad Siddiqui', 'PIAIC120702'),
 ('21:42:13', 'Azfar Suhail', 'PIAIC218333'),
 ('21:42:20', 'Ayesha Arshad', 'PIAIC-225620')]