# Data Wrangling
 * .merge
 * .join
 * .concat
   * pd.concat([df1, df2])
# Joining
 * left join
 * right join
 * inner join
 * outer join
 * cross join

https://www.w3schools.com/sql/sql_join.asp

In [2]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import pandera as pa
from pandera.typing import DataFrame, Series
import random
from typing import Dict, Union, Type

In [4]:
import pandas as pd

# Create Students DataFrame
students : pd.DataFrame = pd.DataFrame({
    'student_id': [1, 2, 3, 4, 5],
    'student_name': ['Alice', 'Bob', 'Charlie', 'David', 'Eva'],
    'teacher_id': [101, 102, 103, 101, 102]
})

# Create Fees DataFrame
fees : pd.DataFrame = pd.DataFrame({
    'student_id': [1, 2, 3, 4, 6],
    'fees_paid': [1000, 1500, 2000, 2500, 3000]
})

# Create Teachers DataFrame
teachers : pd.DataFrame = pd.DataFrame({
    'teacher_id': [101, 102, 103, 104],
    'teacher_name': ['Mr. Smith', 'Ms. Johnson', 'Mrs. Brown', 'Mr. White']
})
# Create Courses DataFrame
courses :pd.DataFrame = pd.DataFrame({
    'course_id': [201, 202, 203, 204],
    'course_name': ['Mathematics', 'Science', 'History', 'Art']
})
# Inner Join on Students and Fees
inner_join = pd.merge(students, fees, on='student_id', how='inner')
print("Inner Join:\n", inner_join)

# Left Join on Students and Fees
left_join = pd.merge(students, fees, on='student_id', how='left')
print("\nLeft Join:\n", left_join)

# Right Join on Students and Fees
right_join = pd.merge(students, fees, on='student_id', how='right')
print("\nRight Join:\n", right_join)

# Outer Join on Students and Fees
outer_join = pd.merge(students, fees, on='student_id', how='outer')
print("\nOuter Join:\n", outer_join)


# Joining Students and Courses
students_courses_join = pd.merge(students, courses, on='course_id', how='inner')
print("\nStudents and Courses Join:\n", students_courses_join)

# Full Join on Students, Fees, Teachers, and Courses
full_join = pd.merge(pd.merge(pd.merge(students, fees, on='student_id', how='outer'), teachers, on='teacher_id', how='outer'), courses, on='course_id', how='outer')
# Joining Students and Teachers
students_teachers_join = pd.merge(students, teachers, on='teacher_id', how='inner')
print("\nStudents and Teachers Join:\n", students_teachers_join)


Inner Join:
    student_id student_name  teacher_id  fees_paid
0           1        Alice         101       1000
1           2          Bob         102       1500
2           3      Charlie         103       2000
3           4        David         101       2500

Left Join:
    student_id student_name  teacher_id  fees_paid
0           1        Alice         101     1000.0
1           2          Bob         102     1500.0
2           3      Charlie         103     2000.0
3           4        David         101     2500.0
4           5          Eva         102        NaN

Right Join:
    student_id student_name  teacher_id  fees_paid
0           1        Alice       101.0       1000
1           2          Bob       102.0       1500
2           3      Charlie       103.0       2000
3           4        David       101.0       2500
4           6          NaN         NaN       3000

Outer Join:
    student_id student_name  teacher_id  fees_paid
0           1        Alice       101.0     100

In [5]:
students

Unnamed: 0,student_id,student_name,teacher_id
0,1,Alice,101
1,2,Bob,102
2,3,Charlie,103
3,4,David,101
4,5,Eva,102


In [6]:
teachers

Unnamed: 0,teacher_id,teacher_name
0,101,Mr. Smith
1,102,Ms. Johnson
2,103,Mrs. Brown
3,104,Mr. White


In [7]:
fees

Unnamed: 0,student_id,fees_paid
0,1,1000
1,2,1500
2,3,2000
3,4,2500
4,6,3000


In [None]:
import pandas as pd
from datetime import datetime
from typing import List
import pandera as pa
from pandera.typing import DataFrame, Series

# Define the data structure with static type annotations
class Student:
    def __init__(self, roll_no: int, name: str, fathername: str, date_of_admission: datetime, fee: int, course: str):
        self.roll_no = roll_no
        self.name = name
        self.fathername = fathername
        self.date_of_admission = date_of_admission
        self.fee = fee
        self.course = course

# Create a list of students
students: List[Student] = [
    Student(1, "Alice", "John", datetime(2022, 1, 10), 1500, "Mathematics"),
    Student(2, "Bob", "Robert", datetime(2022, 2, 14), 2000, "Physics"),
    Student(3, "Charlie", "Michael", datetime(2022, 3, 18), 1500, "Chemistry"),
    Student(4, "David", "William", datetime(2022, 4, 22), 2500, "Mathematics"),
    Student(5, "Eva", "Richard", datetime(2022, 5, 26), 2000, "Computer Science"),
    Student(6, "Frank", "Joseph", datetime(2022, 6, 30), 1500, "Mathematics"),
    Student(7, "Grace", "Charles", datetime(2022, 7, 4), 2500, "Physics"),
    Student(8, "Hannah", "Thomas", datetime(2022, 8, 8), 2000, "Mathematics"),
    Student(9, "Ivan", "Christopher", datetime(2022, 9, 12), 1500, "Biology"),
    Student(10, "Jack", "Daniel", datetime(2022, 10, 16), 2500, "Computer Science")
]

# Convert the list of students to a DataFrame
student_df = pd.DataFrame([{
    "roll_no": student.roll_no,
    "name": student.name,
    "fathername": student.fathername,
    "course": student.course,
    "date_of_admission": student.date_of_admission,
    "fee": student.fee,
} for student in students])

# Define the schema using pandera

class StudentSchema(pa.SchemaModel):
    roll_no: Series[int] = pa.Field(ge=1)
    name: Series[str] = pa.Field()
    fathername: Series[str] = pa.Field()
    course: Series[str] = pa.Field()
    date_of_admission: Series[datetime] = pa.Field()
    fee: Series[int] = pa.Field(ge=0)

    class Config:
        coerce = True

# Validate the DataFrame
validated_student_df: DataFrame[StudentSchema] = StudentSchema.validate(student_df)

# Display the validated DataFrame
print(validated_student_df)
