----------------
#### **Author Name :** Muhammad Muneer Hussain
#### **Linkedin :** [Click Here](https://www.linkedin.com/in/muneer-hussain-ai/)
#### **Github :** [Click Here](https://github.com/Muhammad-Muneer-Hussain)
#### **Gmail :** muhammadmuneerhussain85@gmail.com

-----------------------------

# Pandas_Numpy_Extended
- Mostly use functions

### `np.array()`

Create an array from a Python list or nested lists

In [2]:
import numpy as np
arr = np.array([1, 2, 3])
arr

array([1, 2, 3])

### `np.arange()`

Create evenly spaced values within a range.

In [3]:
import numpy as np
np.arange(0, 10, 2)

array([0, 2, 4, 6, 8])

### `np.linspace()`

Create evenly spaced numbers over a specified interval.

In [4]:
import numpy as np
np.linspace(0, 1, 5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

### `np.reshape()`

Change the shape of an array without changing data.

In [5]:
import numpy as np
arr = np.arange(6).reshape(2, 3)
arr

array([[0, 1, 2],
       [3, 4, 5]])

### `np.ravel()` / `flatten()`

Flatten a multi-dimensional array to 1D.

In [6]:
import numpy as np
arr = np.array([[1,2,3],[4,5,6]])
arr.ravel()
# or
arr.flatten()

array([1, 2, 3, 4, 5, 6])

### `np.transpose()` / `.T`

Transpose an array (swap axes).

In [9]:
import numpy as np
arr = np.array([[1,2,3],[4,5,6]])
arr.T

array([[1, 4],
       [2, 5],
       [3, 6]])

### `np.concatenate()`

Join arrays along an existing axis.

In [13]:
import numpy as np
a = np.array([1,2,3])
b = np.array([4,5,6])
np.concatenate([a, b])

array([1, 2, 3, 4, 5, 6])

### `np.vstack()` / `np.hstack()`

Stack arrays vertically / horizontally.

In [14]:
import numpy as np
a = np.array([1,2,3])
b = np.array([4,5,6])
np.vstack([a, b])
np.hstack([a, b])

array([1, 2, 3, 4, 5, 6])

### `np.expand_dims()` / `np.squeeze()`

Add or remove dimensions.

In [17]:
import numpy as np
a = np.array([1,2,3])
np.expand_dims(a, axis=0)
# remove single-dimensional entries
np.squeeze(np.array([[1,2,3]]))

array([1, 2, 3])

### `np.sum(axis=)`

Sum elements along an axis (axis=0 column-wise, axis=1 row-wise).

In [18]:
import numpy as np
arr = np.array([[1,2,3],[4,5,6]])
np.sum(arr, axis=0)
np.sum(arr, axis=1)

array([ 6, 15])

### `np.mean()` / `np.median()` / `np.std()`

Compute mean, median, std deviation (supports axis parameter).

In [19]:
import numpy as np
arr = np.array([[1,2,3],[4,5,6]])
np.mean(arr, axis=1)
np.median(arr)
np.std(arr)

np.float64(1.707825127659933)

### `np.min()` / `np.max()`

Find min or max value along an axis.

In [23]:
import numpy as np


arr = np.array([[1,2,3],[4,5,6]])
np.min(arr, axis=0)
np.max(arr, axis=1)

array([3, 6])

### `np.argmin()` / `np.argmax()`

Indices of min or max values.

In [24]:
import numpy as np
arr = np.array([3,1,2])
np.argmin(arr)
np.argmax(arr)

np.int64(0)

### `np.unique()`

Return unique values (optionally return counts).

In [25]:
import numpy as np
arr = np.array([1,2,2,3,1])
np.unique(arr)
np.unique(arr, return_counts=True)

(array([1, 2, 3]), array([2, 2, 1]))

### `np.where()`

Return indices or elements meeting a condition.

In [29]:
import numpy as np
arr = np.array([1,5,2,8,3])
np.where(arr > 3)
# use to select elements
arr[np.where(arr > 3)]

array([5, 8])

### `np.sort()`

Return a sorted copy of an array.

In [30]:
import numpy as np
arr = np.array([3,1,2])
np.sort(arr)

array([1, 2, 3])

### `np.dot()` / `np.matmul()`

Matrix multiplication.

In [32]:
import numpy as np
A = np.array([[1,2],[3,4]])
B = np.array([[5,6],[7,8]])
np.dot(A, B)
np.matmul(A, B)

array([[19, 22],
       [43, 50]])

### `np.eye()` / `np.identity()`

Create identity matrix.

In [33]:
import numpy as np
np.eye(3)
np.identity(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

### `np.zeros()` / `np.ones()`

Create arrays of zeros / ones.

In [34]:
import numpy as np
np.zeros((2,3))
np.ones((2,2))

array([[1., 1.],
       [1., 1.]])

### `np.random.rand()` / `np.random.randn()`

Generate random arrays (uniform / normal).

In [35]:
import numpy as np
np.random.rand(2,3)
np.random.randn(2,3)

array([[-2.54504458, -1.3327953 , -0.26379813],
       [ 1.51348979, -2.26301239,  0.03441873]])

### `np.save()` / `np.load()`

Save/load arrays to .npy format.

In [36]:
import numpy as np
arr = np.arange(5)
np.save('tmp_array.npy', arr)
np.load('tmp_array.npy')

array([0, 1, 2, 3, 4])

### `np.genfromtxt()` / `np.savetxt()`

Read/write text data files (CSV, etc.).

In [39]:
import numpy as np
arr = np.array([[1,2,3],[4,5,6]])
np.savetxt('tmp.csv', arr, delimiter=',')
np.genfromtxt('tmp.csv', delimiter=',')

array([[1., 2., 3.],
       [4., 5., 6.]])

## Pandas â€” one function per cell
Each cell below demonstrates a single Pandas function or closely related pair.

### `pd.Series()`

Create a one-dimensional labeled array.

In [40]:
import pandas as pd
s = pd.Series([10, 20, 30], name='grades')
s

0    10
1    20
2    30
Name: grades, dtype: int64

### `pd.DataFrame()`

Create a two-dimensional labeled structure.

In [41]:
import pandas as pd
df = pd.DataFrame({'A':[1,2], 'B':[3,4]})
df

Unnamed: 0,A,B
0,1,3
1,2,4


### `pd.read_csv()` / `to_csv()`

Import/export CSV files.

In [42]:
import pandas as pd
df = pd.DataFrame({'A':[1,2],'B':[3,4]})
df.to_csv('sample.csv', index=False)
pd.read_csv('sample.csv')

Unnamed: 0,A,B
0,1,3
1,2,4


### `pd.read_excel()` / `to_excel()`

Import/export Excel files (requires openpyxl/xlrd depending on format).

In [43]:
import pandas as pd
df = pd.DataFrame({'A':[1,2],'B':[3,4]})
df.to_excel('sample.xlsx', index=False)
# pd.read_excel('sample.xlsx')  # uncomment if openpyxl is available

ModuleNotFoundError: No module named 'openpyxl'

### `pd.read_json()` / `to_json()`

Import/export JSON data.

In [44]:
import pandas as pd
df = pd.DataFrame({'A':[1,2],'B':[3,4]})
df.to_json('sample.json')
pd.read_json('sample.json')

Unnamed: 0,A,B
0,1,3
1,2,4


### `head()` / `tail()`

View first/last n rows.

In [45]:
import pandas as pd
df = pd.DataFrame({'A':range(10),'B':range(10,20)})
df.head(3)
df.tail(2)

Unnamed: 0,A,B
8,8,18
9,9,19


### `info()`

Display summary of DataFrame.

In [46]:

import pandas as pd
df = pd.DataFrame({'A':[1,2,3],'B':['x','y','z']})
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   A       3 non-null      int64 
 1   B       3 non-null      object
dtypes: int64(1), object(1)
memory usage: 180.0+ bytes


### `describe()`

Summary statistics for numeric columns.

In [47]:
import pandas as pd
df = pd.DataFrame({'num':[1,2,3,4],'cat':['a','b','a','b']})
df.describe()

Unnamed: 0,num
count,4.0
mean,2.5
std,1.290994
min,1.0
25%,1.75
50%,2.5
75%,3.25
max,4.0


### `dtypes`

Show data types of columns.

In [48]:
import pandas as pd
df = pd.DataFrame({'A':[1,2.5],'B':['x','y']})
df.dtypes

A    float64
B     object
dtype: object

### `shape` / `size`

Shape and total elements of DataFrame.

In [49]:
import pandas as pd
df = pd.DataFrame({'A':[1,2,3]})
df.shape, df.size

((3, 1), 3)

### `empty`

Check if DataFrame is empty.

In [50]:
import pandas as pd
df = pd.DataFrame()
df.empty

True

### `columns` / `index`

Access or rename labels.

In [51]:
import pandas as pd
df = pd.DataFrame({'A':[1], 'B':[2]})
cols = df.columns
idx = df.index
# rename
df.columns = ['X','Y']
df

Unnamed: 0,X,Y
0,1,2


### `select_dtypes()`

Select columns by data type.

In [52]:
import pandas as pd
df = pd.DataFrame({'num':[1,2], 'txt':['a','b']})
df.select_dtypes(include='number')

Unnamed: 0,num
0,1
1,2


### `astype()`

Change column data type.

In [53]:
import pandas as pd
df = pd.DataFrame({'A':['1','2']})
df['A'] = df['A'].astype(int)
df.dtypes

A    int64
dtype: object

### `to_datetime()`

Convert column to datetime.

In [54]:
import pandas as pd
df = pd.DataFrame({'date':['2020-01-01','2020-02-01']})
df['date'] = pd.to_datetime(df['date'])
df.dtypes

date    datetime64[ns]
dtype: object

### `isna()` / `notna()`

Detect missing values.

In [55]:
import pandas as pd
df = pd.DataFrame({'A':[1,None,3]})
df.isna().sum()
df.notna()

Unnamed: 0,A
0,True
1,False
2,True


### `dropna()`

Drop rows/columns with missing values (axis=0 rows, axis=1 columns).

In [56]:
import pandas as pd
df = pd.DataFrame({'A':[1,None,3],'B':[None,2,3]})
df.dropna()
# drop columns with any NA
# df.dropna(axis=1)

Unnamed: 0,A,B
2,3.0,3.0


### `fillna()`

Fill missing values with constant/method.

In [57]:
import pandas as pd
df = pd.DataFrame({'A':[1,None,3],'B':[4,5,None]})
df.fillna(0)
# or use method df.fillna(method='ffill')

Unnamed: 0,A,B
0,1.0,4.0
1,0.0,5.0
2,3.0,0.0


### `replace()`

Replace specific values.

In [58]:
import pandas as pd
df = pd.DataFrame({'A':[1, -999, 3]})
df.replace({-999: None})

Unnamed: 0,A
0,1.0
1,
2,3.0


### `drop_duplicates()`

Remove duplicate rows.

In [59]:
import pandas as pd
df = pd.DataFrame({'A':[1,1,2],'B':[3,3,4]})
df.drop_duplicates()

Unnamed: 0,A,B
0,1,3
2,2,4


### `duplicated()`

Find duplicated rows (boolean mask).

In [60]:
import pandas as pd
df = pd.DataFrame({'A':[1,1,2]})
df.duplicated()

0    False
1     True
2    False
dtype: bool

### `sort_values()`

Sort DataFrame by one or more column values.

In [61]:
import pandas as pd
df = pd.DataFrame({'Name':['b','a','c'],'Age':[30,20,25]})
df.sort_values(by='Age')

Unnamed: 0,Name,Age
1,a,20
2,c,25
0,b,30


### `sort_index()`

Sort by index.

In [62]:
import pandas as pd
df = pd.DataFrame({'A':[1,2]}, index=[2,1])
df.sort_index()

Unnamed: 0,A
1,2
2,1


### `set_index()` / `reset_index()`

Set or reset DataFrame index.

In [63]:
import pandas as pd
df = pd.DataFrame({'id':[101,102],'val':[1,2]})
df2 = df.set_index('id')
df2.reset_index()

Unnamed: 0,id,val
0,101,1
1,102,2


### `loc` / `iloc`

Select data by label (`loc`) or position (`iloc`).

In [64]:
import pandas as pd
df = pd.DataFrame({'A':[10,20,30],'B':[1,2,3]}, index=['x','y','z'])
df.loc['y','A']
df.iloc[1,0]

np.int64(20)

### `at` / `iat`

Fast access by label or integer position.

In [65]:
import pandas as pd
df = pd.DataFrame({'A':[5,6]})
df.at[0,'A']
df.iat[0,0]

np.int64(5)

### `query()`

Query rows using an expression string.

In [66]:
import pandas as pd
df = pd.DataFrame({'Age':[25,35,45]})
df.query('Age > 30')

Unnamed: 0,Age
1,35
2,45


### `assign()`

Create new columns (returns new DataFrame).

In [67]:
import pandas as pd
df = pd.DataFrame({'A':[1,2]})
df2 = df.assign(B = df['A']*10)
df2

Unnamed: 0,A,B
0,1,10
1,2,20


### `apply()` / `applymap()`

Apply functions row/column-wise or element-wise.

In [68]:
import pandas as pd
df = pd.DataFrame({'A':[1,2],'B':[3,4]})
df.apply(np.mean, axis=0)
df.applymap(lambda x: x*2)

  df.applymap(lambda x: x*2)


Unnamed: 0,A,B
0,2,6
1,4,8


### `map()`

Apply function or dict to Series values.

In [69]:
import pandas as pd
s = pd.Series(['A','B','A'])
s.map({'A':1,'B':2})

0    1
1    2
2    1
dtype: int64

### `groupby()`

Group data for aggregation.

In [70]:
import pandas as pd
df = pd.DataFrame({'Dept':['HR','HR','IT'],'Salary':[50,60,70]})
df.groupby('Dept')['Salary'].mean()

Dept
HR    55.0
IT    70.0
Name: Salary, dtype: float64

### `agg()` / `aggregate()`

Aggregate multiple functions on columns.

In [71]:
import pandas as pd
df = pd.DataFrame({'A':[1,2,3]})
df.agg({'A':['min','max','mean']})

Unnamed: 0,A
min,1.0
max,3.0
mean,2.0


### `transform()`

Transform group data to match original shape.

In [72]:
import pandas as pd
df = pd.DataFrame({'Dept':['HR','HR','IT'],'Salary':[50,60,70]})
df['Dept_mean'] = df.groupby('Dept')['Salary'].transform('mean')
df

Unnamed: 0,Dept,Salary,Dept_mean
0,HR,50,55.0
1,HR,60,55.0
2,IT,70,70.0


### `pivot_table()`

Create pivot table with aggregation.

In [73]:
import pandas as pd
df = pd.DataFrame({'Dept':['HR','IT','HR'],'Sales':[10,20,30]})
pd.pivot_table(df, index='Dept', values='Sales', aggfunc='sum')

Unnamed: 0_level_0,Sales
Dept,Unnamed: 1_level_1
HR,40
IT,20


### `melt()`

Unpivot wide DataFrame to long format.

In [74]:
import pandas as pd
df = pd.DataFrame({'id':[1,2],'Jan':[10,20],'Feb':[15,25]})
pd.melt(df, id_vars=['id'], var_name='Month', value_name='Sales')

Unnamed: 0,id,Month,Sales
0,1,Jan,10
1,2,Jan,20
2,1,Feb,15
3,2,Feb,25


### `concat()`

Concatenate DataFrames along axis.

In [75]:
import pandas as pd
df1 = pd.DataFrame({'A':[1,2]})
df2 = pd.DataFrame({'A':[3,4]})
pd.concat([df1, df2], ignore_index=True)

Unnamed: 0,A
0,1
1,2
2,3
3,4


### `merge()`

SQL-style join of DataFrames.

In [76]:
import pandas as pd
df1 = pd.DataFrame({'id':[1,2],'v1':[10,20]})
df2 = pd.DataFrame({'id':[2,1],'v2':[30,40]})
pd.merge(df1, df2, on='id')

Unnamed: 0,id,v1,v2
0,1,10,40
1,2,20,30


### `join()`

Join on index or key column.

In [77]:
import pandas as pd
df1 = pd.DataFrame({'v1':[1,2]}, index=['a','b'])
df2 = pd.DataFrame({'v2':[3,4]}, index=['a','b'])
df1.join(df2)

Unnamed: 0,v1,v2
a,1,3
b,2,4


### `value_counts()`

Frequency of unique values in a Series.

In [78]:
import pandas as pd
s = pd.Series(['x','y','x','z'])
s.value_counts()

x    2
y    1
z    1
Name: count, dtype: int64

### `unique()`

Return unique values of a Series.

In [79]:
import pandas as pd
s = pd.Series([1,2,2,3])
s.unique()

array([1, 2, 3])

### `nunique()`

Count unique values.

In [80]:
import pandas as pd
s = pd.Series([1,2,2,3])
s.nunique()

3

### `between()`

Select rows within a range.

In [81]:
import pandas as pd
df = pd.DataFrame({'Score':[50,80,90]})
df[df['Score'].between(60,95)]

Unnamed: 0,Score
1,80
2,90


### `clip()`

Limit values to a range.

In [82]:
import pandas as pd
df = pd.DataFrame({'Score':[120,80,-5]})
df['Score'].clip(0,100)

0    100
1     80
2      0
Name: Score, dtype: int64

### `where()`

Replace values conditionally.

In [83]:
import pandas as pd
df = pd.DataFrame({'Score':[50,80,30]})
df['Pass'] = df['Score'].where(df['Score']>50, 'Fail')
df

Unnamed: 0,Score,Pass
0,50,Fail
1,80,80
2,30,Fail


### `sample()`

Return random sample of rows.

In [84]:
import pandas as pd
df = pd.DataFrame({'A':range(10)})
df.sample(n=3)

Unnamed: 0,A
8,8
1,1
5,5


### `corr()` / `cov()`

Correlation / covariance matrix.

In [85]:
import pandas as pd
df = pd.DataFrame({'x':[1,2,3],'y':[1,4,9]})
df.corr()
df.cov()

Unnamed: 0,x,y
x,1.0,4.0
y,4.0,16.333333


### `sum()` / `mean()` / `median()`

Compute summary statistics.

In [86]:
import pandas as pd
df = pd.DataFrame({'Sales':[10,20,30]})
df['Sales'].sum()
df['Sales'].mean()
df['Sales'].median()

20.0

### `std()` / `var()`

Standard deviation and variance.

In [87]:
import pandas as pd
df = pd.DataFrame({'Sales':[10,20,30]})
df['Sales'].std()
df['Sales'].var()

100.0

### `mode()`

Most frequent value.

In [88]:
import pandas as pd
df = pd.DataFrame({'City':['A','B','A']})
df['City'].mode()

0    A
Name: City, dtype: object

### `idxmax()` / `idxmin()`

Index of max / min value.

In [89]:
import pandas as pd
df = pd.DataFrame({'Sales':[5,9,3]})
df['Sales'].idxmax()
df['Sales'].idxmin()

2

### `pipe()`

Chain operations in function pipelines.

In [90]:
import pandas as pd
def add_col(df):
    return df.assign(Total = df['A'] + df['B'])
df = pd.DataFrame({'A':[1,2],'B':[3,4]})
df.pipe(add_col)

Unnamed: 0,A,B,Total
0,1,3,4
1,2,4,6


### `eval()`

Evaluate expressions in DataFrame.

In [91]:
import pandas as pd
df = pd.DataFrame({'A':[1,2],'B':[3,4]})
df.eval('Total = A + B')
df

Unnamed: 0,A,B
0,1,3
1,2,4
