# __Data Frame Functions__

In [None]:
import pandas as pd
import numpy as np

In [None]:
data = {
    'EmpID': [101, 102, 102, 104, 105, 106, 107, 108, 109, 110],
    'Name': ['Ali', 'Sara', None, 'Zara', 'Usman', 'Hina', 'Bilal', 'Nida', 'Omer', 'Ayesha'],
    'Dept': ['IT', 'hr', 'IT ', 'Finance', 'Sales', 'it', 'HR', 'Marketing', 'Finance', 'IT'],
    'Salary': ['45,000', '52000', '48k', '60,000', None, '47,000', '55,000', '62,000', '58000', '50k'],
    'Experience': [1, 5, np.nan, 7, 3, 1, 4, np.nan, 8, 2],
    'JoinDate': ['2021-01-10', '2018/05/20', None, '2016-03-15', '2019-07-01', 
                 '2022-02-11', '2017/08/30', '2015-06-05', '2014-11-25', '2020-09-01']
}




In [None]:
df = pd.DataFrame(data)

df['Dept'] = df['Dept'].str.upper().str.strip()
# Validation using .isin()
valid_Depts = ['IT', 'HR', 'FINANCE', 'MARKETING']
df.loc[~df['Dept'].isin(valid_Depts),'Dept']=None

# Using Replace Function
df=df.replace({'Salary':'k'},'000',regex=True)
df=df.replace({'Salary':','},'',regex=True)

# Using str.replace
df['Salary'] = df['Salary'].str.replace('k','000').str.replace(',','')
df['Salary']=pd.to_numeric(df['Salary'])
df['Salary']=df['Salary'].fillna(df['Salary'].median())
df['Salary']=df['Salary'].astype(int)

df['Salary']

globalExpMean=df['Experience'].mean()

s=df.groupby('Dept')['Experience'].transform('mean')
df['Experience']=df['Experience'].fillna(s)
df['Experience']=df['Experience'].fillna(globalExpMean)
df['Experience']=df['Experience'].round(2)
df

df['JoinDate']=df['JoinDate'].str.replace('/','-')
df['JoinDate']=pd.to_datetime(df['JoinDate'])

df.dropna(subset=['Name'])
df.reset_index(drop=True)

 ## __1 Creation & Basic Info (Understand Your Data)__


###  pd.DataFrame() Create DataFrame From dict, list, ndarray 

In [None]:

df = pd.DataFrame(data)

### df.head(n) First n rows Quick preview

In [None]:
df.head(4)

### df.tail(n) Last n rows Check end

In [None]:
df.tail(5)

### df.sample(n) Random rows Spot-check data 

In [None]:
df.sample(5)

### df.shape : (rows, cols) Dataset size

In [None]:
df.shape

### df.size :Total cells Memory idea 


In [None]:
df.size

### df.ndim Dimensions Usually 2

In [None]:
df.ndim

### df.info() Columns, dtypes, nulls MOST IMPORTANT 

In [None]:
df.info()

### df.describe() Stats summary Numeric overview 

In [None]:
df.describe()

### df.columns Column names Rename, inspect

In [None]:
df.columns

### df.index Index object Reset/align


In [None]:
df.index

## __2 Column & Index Operations__

### df['col'] Select column (Series) 


In [None]:
df['Name']

### df[['c1','c2']] Multiple columns 


In [None]:
df[['EmpID','Name']]


### df.rename(columns/index={Name:NewName}) : Rename columns/index 

In [None]:
df.rename(columns={"JoinDate":"JoiningDate"})
df.rename(index={2:4})

### df.set_index() Set column as index 


In [None]:
df.set_index("EmpID")

### df.reset_index(drop=True) Reset index 

In [None]:
df.reset_index(drop=True)

### df.insert(index, "Name", value) Insert column at position 

In [None]:
age=pd.Series(np.random.randint(20,30,10))
df.insert(3,"Age",age)


### df.pop() Remove & return column

In [None]:
age=df.pop("Age")

### df.drop() Drop rows or columns

In [None]:
df.drop("Dept",axis=1)
df.drop(4,axis=0)

## __3 Selection & Filtering__
    Label / Position Based 

### df.loc[index]['Col'] Label-based (SAFE) 

In [None]:
df.loc[0]['Name']
df.loc[0]


### df.iloc[index]['Col'] Position-based 


In [None]:
df.iloc[0]['Name']
df.iloc[0]

### df.at[row-index, col-index] Fast single value 

In [None]:
df.at[0,'Name']

### df.iat[row-index,col-Index] Fast positional value

In [None]:
df.iat[0,1]


### Boolean Filtering

| Pattern | Meaning |
| --- | --- |
| `df[df['col'] > x]` | Filter rows based on value |
| `df.loc[cond, cols]` | **SAFE** filter + modify (Selects specific rows/cols) |
| `(cond1) & (cond2)` | Bitwise **AND** (Both conditions must be true) |
| `(cond1) | (cond2)` |
| `~cond` | Bitwise **NOT** (Inverses the condition) |



In [None]:
df[df['Experience']>2]

In [None]:
df.loc[df['Experience']>2,'Salary']=50000

In [None]:
df[(df['Experience']>2) & (df['JoinDate']<'2018-01-01')]

In [59]:
df[~(df['Dept']=='IT')]


Unnamed: 0,EmpID,Name,Dept,Salary,Experience,JoinDate
1,102,Sara,HR,50000,5.0,2018-05-20
3,104,Zara,FINANCE,50000,7.0,2016-03-15
4,105,Usman,,50000,3.0,2019-07-01
6,107,Bilal,HR,50000,4.0,2017-08-30
7,108,Nida,MARKETING,50000,3.88,2015-06-05
8,109,Omer,FINANCE,50000,8.0,2014-11-25


---
## __4.Modification / Assignment__

| Method | Purpose |
| --- | --- |
| `df.loc[cond, col] = val` | **Conditional update** (Modifies specific rows/cols in place) |
| `df[col] = ...` | **Create/Overwrite** an entire column |
| `df.assign()` | **Create new column safely** (Returns a new DataFrame; original is untouched) |
| `df.update()` | **Update** values using data from another DataFrame |


---

#### ⚠️ Best Practice: Avoiding "Chained Assignment"

Pandas cannot always guarantee whether you are modifying a **view** or a **copy** when you chain slice operations. Always use `.loc` for conditional updates.

**Avoid (WRONG)**

> `df[df['Stock'] < 50]['Price'] = 999`
> *Risk: May raise `SettingWithCopyWarning` and fail to update the original DataFrame.*

**Correct (SAFE)**

> `df.loc[df['Stock'] < 50, 'Price'] = 99`
> *Result: Explicitly targets the rows and column in a single step.*

---

In [None]:
df.loc[df['Dept']=='IT','Salary']*=1.10

Unnamed: 0,EmpID,Name,Dept,Salary,Experience,JoinDate
0,101,Ali,IT,54450.0,1.0,2021-01-10
1,102,Sara,HR,50000.0,5.0,2018-05-20
2,102,,IT,58080.0,1.33,NaT
3,104,Zara,FINANCE,50000.0,7.0,2016-03-15
4,105,Usman,,50000.0,3.0,2019-07-01
5,106,Hina,IT,56870.0,1.0,2022-02-11
6,107,Bilal,HR,50000.0,4.0,2017-08-30
7,108,Nida,MARKETING,50000.0,3.88,2015-06-05
8,109,Omer,FINANCE,50000.0,8.0,2014-11-25
9,110,Ayesha,IT,60500.0,2.0,2020-09-01
