In [2]:
import pandas as pd
import numpy as np

# ---Series: create with values + custom index + name ----
s = pd.Series([100, 120, 90], index = ["Jan", "Feb", "Mar"], name = "Sales")
print(s)
print("Index: ", s.index.to_list(), "| dtype: ", s.dtype, "| name: ", s.name)

Jan    100
Feb    120
Mar     90
Name: Sales, dtype: int64
Index:  ['Jan', 'Feb', 'Mar'] | dtype:  int64 | name:  Sales


In [3]:
# Access by label vs by position
print("Feb = ", s.loc["Feb"])  # label-based
print("pos 0 = ", s.iloc[0])   # position-based

Feb =  120
pos 0 =  100


In [None]:
#----DataFrame: dict of column ----
df = pd.DataFrame({
    "Month": ["Jan", "Feb", "Mar", "Apr"],
    "Sales": pd.Series([100, 120, 90, 140], dtype="Int64"),
    "Cost": pd.Series([60, 70, 55, 88], dtype="Int64")
})
print(df)
print("dtypes: \n", df.dtypes)

  Month  Sales  Cost
0   Jan    100    60
1   Feb    120    70
2   Mar     90    55
3   Apr    140    88
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Month   4 non-null      object
 1   Sales   4 non-null      Int64 
 2   Cost    4 non-null      Int64 
dtypes: Int64(2), object(1)
memory usage: 236.0+ bytes
None
dtypes: 
 Month    object
Sales     Int64
Cost      Int64
dtype: object


In [16]:
print(df.info())
print("-----------------------------------------------------")
print(df.head())
print("-----------------------------------------------------")
print(df.tail())
print("-----------------------------------------------------")
print(df.describe())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Month   4 non-null      object
 1   Sales   4 non-null      Int64 
 2   Cost    4 non-null      Int64 
 3   Profit  4 non-null      Int64 
dtypes: Int64(3), object(1)
memory usage: 272.0+ bytes
None
-----------------------------------------------------
  Month  Sales  Cost  Profit
0   Jan    100    60      40
1   Feb    120    70      50
2   Mar     90    55      35
3   Apr    140    88      52
-----------------------------------------------------
  Month  Sales  Cost  Profit
0   Jan    100    60      40
1   Feb    120    70      50
2   Mar     90    55      35
3   Apr    140    88      52
-----------------------------------------------------
           Sales       Cost    Profit
count        4.0        4.0       4.0
mean       112.5      68.25     44.25
std    22.173558  14.568802  8.098354
min         90.0    

In [24]:
# Column as Series; assignments create new columns
df["Profit"] = df["Sales"]-df["Cost"]
print(df[["Month", "Profit"]])
print("-----------------------------------------------------")
# Indexing: .loc (labels), .iloc(positions)
print(df.loc[0, "Sales"], df.iloc[0,1])

  Month  Profit
0   Jan      40
1   Feb      50
2   Mar      35
3   Apr      52
-----------------------------------------------------
100 100


In [None]:
import pandas as pd
df = pd.DataFrame({"A":[1,-1,2,0], "B":[10,20,30,40]}, index=["r0","r1","r2","r3"])

#  chained indexing (risky)
df[df["A"]>0]["B"] = 999     # may modify only a temporary copy

#  one-step loc (safe)
df.loc[df["A"]>0, "B"] = 999
print(df)


    A    B
r0  1  999
r1 -1   20
r2  2  999
r3  0   40


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[df["A"]>0]["B"] = 999     # may modify only a temporary copy


In [5]:
import pandas as pd

a = pd.Series([10, 20], index=["x", "y"])
b = pd.Series([1,  2],  index=["y", "x"])
c = a + b
print(c)

a = pd.Series([10,20], index=["x","y"])
b = pd.Series([1,2],   index=["x","y"])
a + b
# x: NaN (no 'x' in b), y: 22, z: NaN (no 'z' in a)


x    12
y    21
dtype: int64


x    11
y    22
dtype: int64

In [6]:
import numpy as np
import pandas as pd

# 1) From dict of lists/Series
df1 = pd.DataFrame({
    "id": pd.Series([101, 102, 103], dtype="Int64"),
    "name": pd.Series(["A", "B", "C"], dtype="string"),
    "score": [88.5, 92.0, 79.5]
})

# 2) From array + columns
arr = np.array([[1, 10.0], [2, 11.5], [3, 13.2]])
df2 = pd.DataFrame(arr, columns=["step", "value"])

# 3) From range / date_range
df3 = pd.DataFrame({
    "t": range(6),  # 0..5
    "signal": np.sin(np.linspace(0, 2*np.pi, 6))
})

dates = pd.date_range("2025-01-01", periods=4, freq="D")
df4 = pd.DataFrame({"date": dates, "y": [5, 7, 6, 9]})

# 4) From List of dicts (records)
records = [{"city": "Dhaka", "pop": 22.5}, {"city": "Chattogram", "pop": 2.6}]
df5 = pd.DataFrame.from_records(records)

# Display info of all DataFrames
for i, d in enumerate([df1, df2, df3, df4, df5], start=1):
    print(f"\nDF{i} shape={d.shape}\n", d.head())



DF1 shape=(3, 3)
     id name  score
0  101    A   88.5
1  102    B   92.0
2  103    C   79.5

DF2 shape=(3, 2)
    step  value
0   1.0   10.0
1   2.0   11.5
2   3.0   13.2

DF3 shape=(6, 2)
    t    signal
0  0  0.000000
1  1  0.951057
2  2  0.587785
3  3 -0.587785
4  4 -0.951057

DF4 shape=(4, 2)
         date  y
0 2025-01-01  5
1 2025-01-02  7
2 2025-01-03  6
3 2025-01-04  9

DF5 shape=(2, 2)
          city   pop
0       Dhaka  22.5
1  Chattogram   2.6


In [7]:
import pandas as pd

pd.options.display.max_rows = 10
pd.options.display.width = 100

df = pd.DataFrame({
    "A": pd.Series([1, 2, pd.NA, 4], dtype="Int64"),
    "B": [10.0, 12.5, 9.5, 11.0],
    "C": pd.Series(["x", "y", "y", "z"], dtype="string"),
    "D": pd.to_datetime(["2025-01-01", "2025-01-02", None, "2025-01-04"])
})

print("shape:", df.shape)
print("columns:", df.columns.tolist())
print("dtypes:\n", df.dtypes)

print("\nHEAD:\n", df.head(3))
print("\nINFO:")
df.info(memory_usage="deep")

print("\nDESCRIBE (numeric):\n", df.describe())
print("\nDESCRIBE (all):\n", df.describe(include="all"))

print("\nMissing values per column:\n", df.isna().sum())
print("\nUnique counts:\n", df.nunique(dropna=False))


shape: (4, 4)
columns: ['A', 'B', 'C', 'D']
dtypes:
 A             Int64
B           float64
C    string[python]
D    datetime64[ns]
dtype: object

HEAD:
       A     B  C          D
0     1  10.0  x 2025-01-01
1     2  12.5  y 2025-01-02
2  <NA>   9.5  y        NaT

INFO:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   A       3 non-null      Int64         
 1   B       4 non-null      float64       
 2   C       4 non-null      string        
 3   D       3 non-null      datetime64[ns]
dtypes: Int64(1), datetime64[ns](1), float64(1), string(1)
memory usage: 432.0 bytes

DESCRIBE (numeric):
               A          B                    D
count       3.0   4.000000                    3
mean   2.333333  10.750000  2025-01-02 08:00:00
min         1.0   9.500000  2025-01-01 00:00:00
25%         1.5   9.875000  2025-01-01 12:00:00
50%         2.0