In [2]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Set display options
pd.set_option('display.max_rows', 10)
pd.set_option('display.max_columns', 10)
pd.set_option('display.width', 100)

In [5]:
print("## 1. DataFrame Creation: \n")

# From dictionary of Series or arrays
df1 = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
print("DataFrame from dictionary of arrays:")
print(df1)
print()

# From list of dictionaries
df2 = pd.DataFrame([{'A': 1, 'B': 2}, {'A': 3, 'B': 4, 'C': 5}])
print("DataFrame from list of dictionaries:")
print(df2)
print()

# From numpy array
df3 = pd.DataFrame(np.random.rand(3, 2), columns=['A', 'B'])
print("DataFrame from numpy array:")
print(df3)
print()

# With custom index
df4 = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=['a', 'b', 'c'])
print("DataFrame with custom index:")
print(df4)
print()

# From CSV file
location = "data/file.csv"
df5 = pd.read_csv(location)
print("DataFrame with file:")
print(df5)
print()


location = "data/file.xlsx"
df5 = pd.read_excel(location)
print("DataFrame with Excel:")
print(df5)
print()

## 1. DataFrame Creation: 

DataFrame from dictionary of arrays:
   A  B
0  1  4
1  2  5
2  3  6

DataFrame from list of dictionaries:
   A  B    C
0  1  2  NaN
1  3  4  5.0

DataFrame from numpy array:
          A         B
0  0.098297  0.334662
1  0.359405  0.570363
2  0.746739  0.044010

DataFrame with custom index:
   A  B
a  1  4
b  2  5
c  3  6

DataFrame with file:
  name  age
0    a   12
1    b   21

DataFrame with Excel:
    name  age  rollno
0  gemma    5       1
1  betta    5       2
2  alpha    5       3



In [13]:
print("## 2. DataFrame Attributes")

# Create a DataFrame for demonstration
df = pd.DataFrame({
    'A': [1, 2, 3],
    'B': [4.0, 5.0, 6.0],
    'C': ['p', 'q', 'r'],
    'D': [True, False, True]
}, index=['a', 'b', 'c'])

print("DataFrame:")
print(df)
print()

print("Values:")
print(df.values)
print()

print(f"Index: {df.index}")
print(f"Columns: {df.columns}")
print("Data types:")
print(df.dtypes)
print()

print(f"Shape: {df.shape}")
print(f"Size: {df.size}")
print(f"Number of dimensions: {df.ndim}")
print(f"Is empty: {df.empty}")
print()

print("Transpose:")
print(df.T)
print()

print(f"Axes: {df.axes}")
print()


## 2. DataFrame Attributes
DataFrame:
   A    B  C      D
a  1  4.0  p   True
b  2  5.0  q  False
c  3  6.0  r   True

Values:
[[1 4.0 'p' True]
 [2 5.0 'q' False]
 [3 6.0 'r' True]]

Index: Index(['a', 'b', 'c'], dtype='object')
Columns: Index(['A', 'B', 'C', 'D'], dtype='object')
Data types:
A      int64
B    float64
C     object
D       bool
dtype: object

Shape: (3, 4)
Size: 12
Number of dimensions: 2
Is empty: False

Transpose:
      a      b     c
A     1      2     3
B   4.0    5.0   6.0
C     p      q     r
D  True  False  True

Axes: [Index(['a', 'b', 'c'], dtype='object'), Index(['A', 'B', 'C', 'D'], dtype='object')]



In [None]:
print("## 3. Indexing and Selection")

# Create a DataFrame for demonstration
df = pd.DataFrame({
    'A': [1, 2, 3, 4, 5],
    'B': [10, 20, 30, 40, 50],
    'C': [100, 200, 300, 400, 500]
}, index=['a', 'b', 'c', 'd', 'e'])

print("DataFrame:")
print(df)
print()

# Select a single column
print("Select column 'A':")
print(df['A'])
print()

# Select multiple columns
print("Select columns 'A' and 'C':")
print(df[['A', 'C']])
print()

# Select by row position
print("Select first row by position:")
print(df.iloc[0])
print()

# Select by row label
print("Select row 'c' by label:")
print(df.loc['c'])
print()

# Select by position (rows and columns)
print("Select first two rows and first two columns by position:")
print(df.iloc[0:2, 0:2])
print()

# Select by label (rows and columns)
print("Select rows 'a' to 'c' and columns 'A' and 'B' by label:")
print(df.loc['a':'c', ['A', 'B']])
print()

# Fast scalar lookup
print(f"Value at row 'a', column 'B' (using .at): {df.at['a', 'B']}")
print(f"Value at row 0, column 1 (using .iat): {df.iat[0, 1]}")
print()

# Cross-section
print("Cross-section of row 'a':")
print(df.xs('a'))
print()

# Filter by column names
print("Filter columns containing only 'A':")
print(df.filter(items=['A']))
print()

# Filter by regex
print("Filter columns starting with 'A' or 'B':")
print(df.filter(regex='^[AB]'))
print()


## 3. Indexing and Selection
DataFrame:
   A   B    C
a  1  10  100
b  2  20  200
c  3  30  300
d  4  40  400
e  5  50  500

Select column 'A':
a    1
b    2
c    3
d    4
e    5
Name: A, dtype: int64

Select columns 'A' and 'C':
   A    C
a  1  100
b  2  200
c  3  300
d  4  400
e  5  500

Select first row by position:
A      1
B     10
C    100
Name: a, dtype: int64

Select row 'c' by label:
A      3
B     30
C    300
Name: c, dtype: int64

Select first two rows and first two columns by position:
   A   B
a  1  10
b  2  20

Select rows 'a' to 'c' and columns 'A' and 'B' by label:
   A   B
a  1  10
b  2  20
c  3  30

Value at row 'a', column 'B' (using .at): 10
Value at row 0, column 1 (using .iat): 10

Cross-section of row 'a':
A      1
B     10
C    100
Name: a, dtype: int64

Filter columns containing only 'A':
   A
a  1
b  2
c  3
d  4
e  5

Filter columns starting with 'A' or 'B':
   A   B
a  1  10
b  2  20
c  3  30
d  4  40
e  5  50

