In [1]:
import pandas as pd

# Creating DataFrames (SQL: CREATE TABLE)
data = {'name': ['Alice', 'Bob', 'Charlie'], 'age': [25, 30, 35], 'city': ['NY', 'Paris', 'London']}
df = pd.DataFrame(data)

In [2]:
df

Unnamed: 0,name,age,city
0,Alice,25,NY
1,Bob,30,Paris
2,Charlie,35,London


In [3]:
# Checking the type of df
print(type(df))  # Output: <class 'pandas.core.frame.DataFrame'>

<class 'pandas.core.frame.DataFrame'>


In [6]:
# Check if df is an instance/object created for the class dataframe from module pandas
print(isinstance(df, pd.DataFrame))  # Output: True

True


In [9]:
'''
Key Takeaways
✅ pd.DataFrame is a class in pandas.
✅ df = pd.DataFrame(data) creates an instance (object) of the DataFrame class.
✅ The data is passed to the __init__ method inside the class.
✅ You can use methods and attributes of DataFrame just like OOP objects (df.head(), df.shape, etc.).
'''

'\nKey Takeaways\n✅ pd.DataFrame is a class in pandas.\n✅ df = pd.DataFrame(data) creates an instance (object) of the DataFrame class.\n✅ The data is passed to the __init__ method inside the class.\n✅ You can use methods and attributes of DataFrame just like OOP objects (df.head(), df.shape, etc.).\n'

In [10]:
df.index 

RangeIndex(start=0, stop=3, step=1)

In [11]:
df.shape # (Rows, Columns) # len(self.index), len(self.columns) 

(3, 3)

In [18]:
print(df.index) # index meens rows
print(df.columns) # list(self._data.keys())
print(df.shape) # return len(self.index), len(self.columns)

RangeIndex(start=0, stop=3, step=1)
Index(['name', 'age', 'city'], dtype='object')
(3, 3)


In [21]:
df.axes # return [self.index, self.columns] Return a list representing the axes of the DataFrame.

[RangeIndex(start=0, stop=3, step=1),
 Index(['name', 'age', 'city'], dtype='object')]

In [28]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   name    3 non-null      object
 1   age     3 non-null      int64 
 2   city    3 non-null      object
dtypes: int64(1), object(2)
memory usage: 200.0+ bytes


In [27]:
df.describe()

Unnamed: 0,age
count,3.0
mean,30.0
std,5.0
min,25.0
25%,27.5
50%,30.0
75%,32.5
max,35.0


In [31]:
df.memory_usage()

Index    128
name      24
age       24
city      24
dtype: int64

In [36]:
df.select_dtypes(include='int64', exclude='object') 

Unnamed: 0,age
0,25
1,30
2,35


In [35]:
df.dtypes # Return the dtypes in the DataFrame.

name    object
age      int64
city    object
dtype: object

In [38]:
df.values # Return a Numpy representation of the DataFrame.

array([['Alice', 25, 'NY'],
       ['Bob', 30, 'Paris'],
       ['Charlie', 35, 'London']], dtype=object)

In [40]:
df.empty

False

In [41]:
df.size

9

In [42]:
df.memory_usage()

Index    128
name      24
age       24
city      24
dtype: int64

In [43]:
df.head

<bound method NDFrame.head of       name  age    city
0    Alice   25      NY
1      Bob   30   Paris
2  Charlie   35  London>

In [45]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   name    3 non-null      object
 1   age     3 non-null      int64 
 2   city    3 non-null      object
dtypes: int64(1), object(2)
memory usage: 200.0+ bytes


In [46]:
df.loc

<pandas.core.indexing._LocIndexer at 0x7fceb867cea8>

In [47]:
df.iloc

<pandas.core.indexing._iLocIndexer at 0x7fceb867c598>

In [48]:
df.at

<pandas.core.indexing._AtIndexer at 0x7fceb868b048>

In [49]:
df.loc()

<pandas.core.indexing._LocIndexer at 0x7fceb868b098>

In [51]:
df = pd.Dataframe

>>> df = pd.DataFrame([[1, 2], [4, 5], [7, 8]],
...                   index=['cobra', 'viper', 'sidewinder'],
...                   columns=['max_speed', 'shield'])
>>> df
            max_speed  shield
cobra               1       2
viper               4       5
sidewinder          7       8

AttributeError: module 'pandas' has no attribute 'Dataframe'

In [52]:
import pandas as pd
import numpy as np

data = np.arange(1, 13).reshape(3, 4)
df = pd.DataFrame(data, columns=["A", "B", "C", "D"])

# Select multiple rows and multiple columns using lists of integers
print("Selection using lists of integers:")
print(df.iloc[[0, 1], [0, 2]])

# Select multiple rows and multiple columns using slices
print("\nSelection using slices:")
print(df.iloc[0:3, 1:3])

# Select multiple rows and multiple columns using boolean masks
print("\nSelection using boolean masks:")
row_mask = [True, False, True]
col_mask = [False, True, False, True]
print(df.iloc[row_mask, col_mask])

# Select multiple rows and multiple columns using callable functions
print("\nSelection using callable functions:")
print(df.iloc[lambda x: [0, 2], lambda x: [1, 3]])

Selection using lists of integers:
   A  C
0  1  3
1  5  7

Selection using slices:
    B   C
0   2   3
1   6   7
2  10  11

Selection using boolean masks:
    B   D
0   2   4
2  10  12

Selection using callable functions:
    B   D
0   2   4
2  10  12


In [54]:
df.iloc[2]

A     9
B    10
C    11
D    12
Name: 2, dtype: int64

In [55]:
df.iloc[0:2]

Unnamed: 0,A,B,C,D
0,1,2,3,4
1,5,6,7,8


In [58]:
df['A']

0    1
1    5
2    9
Name: A, dtype: int64

In [59]:
df[1]

KeyError: 1

In [60]:
df.items()

<generator object DataFrame.items at 0x7fcef8201480>

In [61]:
for col in df:
    print(col)

A
B
C
D


In [62]:
for col in df.columns:
    print(col)

A
B
C
D
