# __Data Frame Functions__

In [None]:
import pandas as pd
import numpy as np

In [None]:
data = {
    'EmpID': [101, 102, 102, 104, 105, 106, 107, 108, 109, 110],
    'Name': ['Ali', 'Sara', None, 'Zara', 'Usman', 'Hina', 'Bilal', 'Nida', 'Omer', 'Ayesha'],
    'Dept': ['IT', 'hr', 'IT ', 'Finance', 'Sales', 'it', 'HR', 'Marketing', 'Finance', 'IT'],
    'Salary': ['45,000', '52000', '48k', '60,000', None, '47,000', '55,000', '62,000', '58000', '50k'],
    'Experience': [1, 5, np.nan, 7, 3, 1, 4, np.nan, 8, 2],
    'JoinDate': ['2021-01-10', '2018/05/20', None, '2016-03-15', '2019-07-01', 
                 '2022-02-11', '2017/08/30', '2015-06-05', '2014-11-25', '2020-09-01']
}




In [None]:
df = pd.DataFrame(data)

df['Dept'] = df['Dept'].str.upper().str.strip()
# Validation using .isin()
valid_Depts = ['IT', 'HR', 'FINANCE', 'MARKETING']
df.loc[~df['Dept'].isin(valid_Depts),'Dept']=None

# Using Replace Function
df=df.replace({'Salary':'k'},'000',regex=True)
df=df.replace({'Salary':','},'',regex=True)

# Using str.replace
df['Salary'] = df['Salary'].str.replace('k','000').str.replace(',','')
df['Salary']=pd.to_numeric(df['Salary'])
df['Salary']=df['Salary'].fillna(df['Salary'].median())
df['Salary']=df['Salary'].astype(int)

df['Salary']

globalExpMean=df['Experience'].mean()

s=df.groupby('Dept')['Experience'].transform('mean')
df['Experience']=df['Experience'].fillna(s)
df['Experience']=df['Experience'].fillna(globalExpMean)
df['Experience']=df['Experience'].round(2)
df

df['JoinDate']=df['JoinDate'].str.replace('/','-')
df['JoinDate']=pd.to_datetime(df['JoinDate'])

df.dropna(subset=['Name'])
df.reset_index(drop=True)

 ## __1 Creation & Basic Info (Understand Your Data)__


###  pd.DataFrame() Create DataFrame From dict, list, ndarray 

In [None]:

df = pd.DataFrame(data)

### df.head(n) First n rows Quick preview

In [None]:
df.head(4)

### df.tail(n) Last n rows Check end

In [None]:
df.tail(5)

### df.sample(n) Random rows Spot-check data 

In [None]:
df.sample(5)

### df.shape : (rows, cols) Dataset size

In [None]:
df.shape

### df.size :Total cells Memory idea 


In [None]:
df.size

### df.ndim Dimensions Usually 2

In [None]:
df.ndim

### df.info() Columns, dtypes, nulls MOST IMPORTANT 

In [None]:
df.info()

### df.describe() Stats summary Numeric overview 

In [None]:
df.describe()

### df.columns Column names Rename, inspect

In [None]:
df.columns

### df.index Index object Reset/align


In [None]:
df.index

## __2 Column & Index Operations__

### df['col'] Select column (Series) 


In [None]:
df['Name']

### df[['c1','c2']] Multiple columns 


In [None]:
df[['EmpID','Name']]


### df.rename(columns/index={Name:NewName}) : Rename columns/index 

In [None]:
df.rename(columns={"JoinDate":"JoiningDate"})
df.rename(index={2:4})

### df.set_index() Set column as index 


In [None]:
df.set_index("EmpID")

### df.reset_index(drop=True) Reset index 

In [None]:
df.reset_index(drop=True)

### df.insert(index, "Name", value) Insert column at position 

In [None]:
age=pd.Series(np.random.randint(20,30,10))
df.insert(3,"Age",age)


### df.pop() Remove & return column

In [None]:
age=df.pop("Age")

### df.drop() Drop rows or columns

In [None]:
df.drop("Dept",axis=1)
df.drop(4,axis=0)

## __3 Selection & Filtering__
    Label / Position Based 

### df.loc[index]['Col'] Label-based (SAFE) 

In [None]:
df.loc[0]['Name']
df.loc[0]


### df.iloc[index]['Col'] Position-based 


In [None]:
df.iloc[0]['Name']
df.iloc[0]

### df.at[row-index, col-index] Fast single value 

In [None]:
df.at[0,'Name']

### df.iat[row-index,col-Index] Fast positional value

In [None]:
df.iat[0,1]


### Boolean Filtering

| Pattern | Meaning |
| --- | --- |
| `df[df['col'] > x]` | Filter rows based on value |
| `df.loc[cond, cols]` | **SAFE** filter + modify (Selects specific rows/cols) |
| `(cond1) & (cond2)` | Bitwise **AND** (Both conditions must be true) |
| `(cond1) | (cond2)` |
| `~cond` | Bitwise **NOT** (Inverses the condition) |



In [None]:
df[df['Experience']>2]

In [None]:
df.loc[df['Experience']>2,'Salary']=50000

In [None]:
df[(df['Experience']>2) & (df['JoinDate']<'2018-01-01')]

In [None]:
df[~(df['Dept']=='IT')]


---
## __4.Modification / Assignment__

| Method | Purpose |
| --- | --- |
| `df.loc[cond, col] = val` | **Conditional update** (Modifies specific rows/cols in place) |
| `df[col] = ...` | **Create/Overwrite** an entire column |
| `df.assign()` | **Create new column safely** (Returns a new DataFrame with added Columns; original is untouched) |
| `df.update()` | **Update** values using data from another DataFrame (Modifies directly by indexes of rows) |


---

#### ⚠️ Best Practice: Avoiding "Chained Assignment"

Pandas cannot always guarantee whether you are modifying a **view** or a **copy** when you chain slice operations. Always use `.loc` for conditional updates.

**Avoid (WRONG)**

> `df[df['Stock'] < 50]['Price'] = 999`
> *Risk: May raise `SettingWithCopyWarning` and fail to update the original DataFrame.*

**Correct (SAFE)**

> `df.loc[df['Stock'] < 50, 'Price'] = 99`
> *Result: Explicitly targets the rows and column in a single step.*

---

In [None]:
df.loc[df['Dept']=='IT','Salary']*=1.10

In [None]:
df['Age']=np.random.randint(20,30,10)
df['Dept']=np.random.choice(['IT','HR','FINANCE'], size=10)

In [None]:
df.assign(
    Bonus_Eligible = lambda x: x['Experience'] > 3.0
)

In [None]:

corrections = pd.DataFrame({
    'Dept': {1: 'HR', 5: 'IT'},
    'Name': {2: 'Ahmed'}
})
df.update(corrections)

## __5 Missing Data Handling__

### df.isna() / df.isnull() : Detect NaNs 

In [None]:
df.isna()
df.isnull()

### df.notna() : Opposite (Only checks NOT NaN)

In [None]:
df.notna()

### df.dropna() :   Remove Rows/Columns containing missing values



### `dropna()` Parameters

| Parameter | Meaning |
| --- | --- |
| `axis=0` / `1` | Drop **rows** (0) or **columns** (1) |
| `how='any'` | Drop if **any** value is `NaN` (Default) |
| `how='all'` | Drop if **all** values are `NaN` |
| `thresh=n` | Keep rows/cols with **at least n** non-NaN values |
| `subset=[cols]` | Consider only these specific columns for `NaN` checks |

---



In [None]:
df.dropna()
df.dropna(axis=1)
df.dropna(how='all')
df.dropna(how='any')
df.dropna(thresh=2)
df.dropna(subset=['JoinDate'])

### df.fillna() : Fill missing 

In [None]:
df.fillna(0)

### df.interpolate() : Estimate values 


In [None]:
df.interpolate()

### df.replace() : Replace specific values

In [None]:
df.replace(to_replace='HR',value='CS')

## __6 String Operations (.str)__ 


### df['col'].str.lower() :  lowercase 

In [74]:
df['Name'].str.lower()

0       ali
1      sara
2     ahmed
3      zara
4     usman
5      hina
6     bilal
7      nida
8      omer
9    ayesha
Name: Name, dtype: object

### str.upper() :  uppercase 

In [76]:
df['Name'].str.upper()

0       ALI
1      SARA
2     AHMED
3      ZARA
4     USMAN
5      HINA
6     BILAL
7      NIDA
8      OMER
9    AYESHA
Name: Name, dtype: object

### str.strip() : remove spaces 

In [77]:
df['Name'].str.strip()

0       Ali
1      Sara
2     Ahmed
3      Zara
4     Usman
5      Hina
6     Bilal
7      Nida
8      Omer
9    Ayesha
Name: Name, dtype: object

### str.replace() : regex replace

In [82]:
df['Salary'].str.replace(',','').str.replace('k','000')

0    45000
1      NaN
2    48000
3      NaN
4      NaN
5    47000
6      NaN
7    62000
8      NaN
9    50000
Name: Salary, dtype: object

### str.contains() :  filter text 

In [85]:
df['Name'].str.contains('Ali')

0     True
1    False
2    False
3    False
4    False
5    False
6    False
7    False
8    False
9    False
Name: Name, dtype: bool

### str.startswith() : prefix 

In [96]:
df['Name'].str.startswith('A')

0     True
1    False
2     True
3    False
4    False
5    False
6    False
7    False
8    False
9     True
Name: Name, dtype: bool


### str.endswith() :  suffix 


In [99]:
df['Name'].str.endswith('a')

0    False
1     True
2    False
3     True
4    False
5     True
6    False
7     True
8    False
9     True
Name: Name, dtype: bool

### str.split() :  split string 

In [113]:
df['Name'].str.split("")

0             [, A, l, i, ]
1          [, S, a, r, a, ]
2       [, A, h, m, e, d, ]
3          [, Z, a, r, a, ]
4       [, U, s, m, a, n, ]
5          [, H, i, n, a, ]
6       [, B, i, l, a, l, ]
7          [, N, i, d, a, ]
8          [, O, m, e, r, ]
9    [, A, y, e, s, h, a, ]
Name: Name, dtype: object