# Numpy

Basic Functions

In [1]:
import numpy as np

a = np.array([[1, 2, 3], [4, 5, 6]])
b = np.array([1, 2, 3])

print(a)
print(b)

[[1 2 3]
 [4 5 6]]
[1 2 3]


In [3]:
print(a.ndim)
print(a.shape)

2
(2, 3)


In [4]:
print(a.size)
print(a.dtype)

6
int64


In [5]:
c = np.array([1.6, 1.6, 1.6])
print(c.dtype)

float64


Mathematical Functions

In [6]:
print(a.min())
print(a.max())

1
6


In [7]:
print(a.sum())


21


In [8]:
print(np.square(a))

[[ 1  4  9]
 [16 25 36]]


In [10]:
a = np.array([[1, 2, 3], [4, 5, 6]])

In [11]:
print(np.mean(a))
print(np.median(b))

3.5
2.0


In [12]:
print(np.std(a))
print(np.var(a))

1.707825127659933
2.9166666666666665


In [14]:
y = np.array([1, 2, 3, 4, 5])
x = np.array([2, 4, 6, 8, 10])
np.corrcoef(x, y)

array([[1., 1.],
       [1., 1.]])

In [15]:
print(np.exp(a))
print(np.log(a))
print(np.sqrt(a))

[[  2.71828183   7.3890561   20.08553692]
 [ 54.59815003 148.4131591  403.42879349]]
[[0.         0.69314718 1.09861229]
 [1.38629436 1.60943791 1.79175947]]
[[1.         1.41421356 1.73205081]
 [2.         2.23606798 2.44948974]]


Different ways of creating numpy arrays

In [18]:
print(np.zeros((2, 3, 2)))

[[[0. 0.]
  [0. 0.]
  [0. 0.]]

 [[0. 0.]
  [0. 0.]
  [0. 0.]]]


In [19]:
print(np.ones((2, 3)))

[[1. 1. 1.]
 [1. 1. 1.]]


In [24]:
print(np.arange(3, 10, 3))

[3 6 9]


In [28]:
print(np.linspace(0, 4, 7))

[0.         0.66666667 1.33333333 2.         2.66666667 3.33333333
 4.        ]


In [29]:
print(np.random.rand(2, 2))

[[0.78180583 0.88786179]
 [0.10070166 0.5521779 ]]


In [30]:
print(np.random.randint(5, 15, size=(2, 3)))

[[10 10  9]
 [ 5 13 13]]


Matrix related functions

In [31]:
a = np.array([[1, 2, 3], [4, 5, 6]])
b = np.array([[1], [2], [3]])

print(a)
print(b)


[[1 2 3]
 [4 5 6]]
[[1]
 [2]
 [3]]


In [32]:
print(np.dot(a, b))

[[14]
 [32]]


In [38]:
arr = np.array([[3, 1, 2], [6, 4, 5]])
print(np.sort(arr))

[[1 2 3]
 [4 5 6]]


In [35]:
print(np.concatenate([arr, arr], axis=1))

[[3 1 2 3 1 2]
 [6 4 5 6 4 5]]


In [36]:
print(arr.reshape(3, 2))

[[3 1]
 [2 6]
 [4 5]]


In [39]:
print(arr.transpose())

[[3 6]
 [1 4]
 [2 5]]


In [42]:
print(np.eye(4))

[[1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]]


Slicing and Indexing

In [45]:
a = np.array([[1, 2, 3], [4, 5, 6]])


In [46]:
print(a[0, 1])
print(a[:, 1])
print(a[1, :2])
print(a[a < 5])

2
[2 5]
[4 5]
[1 2 3 4]


Arithmetic Operations

In [48]:
array_a = np.array([1, 2, 3])
array_b = np.array([4, 5, 6])

In [49]:
addition = array_a + array_b
print(addition)

[5 7 9]


In [50]:
subtraction = array_a - array_b
multiplication = array_a * array_b
division = array_a / array_b

print(subtraction)
print(multiplication)
print(division)

[-3 -3 -3]
[ 4 10 18]
[0.25 0.4  0.5 ]


Copy Functions - Deep Copy

In [51]:
b = a.copy()
b[0, 0] = 99
print(a[0, 0])

1


Broadcasting

In [55]:
data = np.array([1.0, 2.0])
a = np.array([[1],[2]])
print(data * a)

[[1. 2.]
 [2. 4.]]


Some More extra functions

In [56]:
c = np.array([1, 2, 2, 3, 3, 3])
print(np.unique(c))

[1 2 3]


In [57]:
d = np.array([[1, 2], [3, 4]])
print(np.flip(d))
print(np.flip(d, axis=0))
print(np.flip(d, axis=1))

[[4 3]
 [2 1]]
[[3 4]
 [1 2]]
[[2 1]
 [4 3]]


In [60]:
d = np.array([[1, 2], [3, 4], [4, 5]])
print(d)

[[1 2]
 [3 4]
 [4 5]]


In [61]:
print(d.flatten())

[1 2 3 4 4 5]


In [63]:
x = np.array([10, 20, 30, 40, 50, 60])
print(np.split(x, 6))

[array([10]), array([20]), array([30]), array([40]), array([50]), array([60])]


###Pandas

In [64]:
import pandas as pd
import numpy as np

In [66]:
s = pd.Series([10, 20, 30, 40, 50])
print(s)


0    10
1    20
2    30
3    40
4    50
dtype: int64


In [67]:
data = {
    "A": [1, 2, np.nan, 4],
    "B": [5, 6, 7, 8],
    "C": ["x", "y", "z", "w"]
}
df = pd.DataFrame(data)
print(df)


     A  B  C
0  1.0  5  x
1  2.0  6  y
2  NaN  7  z
3  4.0  8  w


In [69]:
df["B"]


Unnamed: 0,B
0,5
1,6
2,7
3,8


In [71]:
df.loc[1]       # row with index label 1


Unnamed: 0,1
A,2.0
B,6
C,y


In [72]:
df.iloc[2]      # row at position 2


Unnamed: 0,2
A,
B,7
C,z


In [73]:
df.at[0, "B"]   # single cell value at index 0 and column 'B'


np.int64(5)

In [75]:
df[0:3]   # similar to Python list slicing


Unnamed: 0,A,B,C
0,1.0,5,x
1,2.0,6,y
2,,7,z


In [76]:
df[df["A"] > 2]  # rows where column A > 2


Unnamed: 0,A,B,C
3,4.0,8,w


In [77]:
df.isna()       # checks where values are NaN


Unnamed: 0,A,B,C
0,False,False,False
1,False,False,False
2,True,False,False
3,False,False,False


In [79]:
df.dropna(inplace=True)     # drops rows with any NaN
df.isna()       # checks where values are NaN


Unnamed: 0,A,B,C
0,False,False,False
1,False,False,False
3,False,False,False


In [None]:
df.fillna(0)    # fills NaN with 0


In [80]:
df["B"] * 2


Unnamed: 0,B
0,10
1,12
3,16


In [81]:
df[["A", "B"]].apply(np.sqrt)  # apply sqrt to numeric columns


Unnamed: 0,A,B
0,1.0,2.236068
1,1.414214,2.44949
3,2.0,2.828427


In [82]:
df[["A", "B"]].transform(lambda x: x * 2)


Unnamed: 0,A,B
0,2.0,10
1,4.0,12
3,8.0,16


In [83]:
df["D"] = df["A"] + df["B"]
df

Unnamed: 0,A,B,C,D
0,1.0,5,x,6.0
1,2.0,6,y,8.0
3,4.0,8,w,12.0


In [84]:
df.drop("D", axis=1, inplace=True)
df

Unnamed: 0,A,B,C
0,1.0,5,x
1,2.0,6,y
3,4.0,8,w


In [85]:
# Save to CSV
df.to_csv("my_data.csv", index=False)

In [86]:
# Read from CSV
df2 = pd.read_csv("/content/my_data.csv")
print(df2)

     A  B  C
0  1.0  5  x
1  2.0  6  y
2  4.0  8  w


In [87]:
df_group = pd.DataFrame({
    "Dept": ["IT", "IT", "HR", "HR", "Admin"],
    "Salary": [50000, 60000, 45000, 47000, 40000]
})

df_group.groupby("Dept").mean()


Unnamed: 0_level_0,Salary
Dept,Unnamed: 1_level_1
Admin,40000.0
HR,46000.0
IT,55000.0


In [88]:
df

Unnamed: 0,A,B,C
0,1.0,5,x
1,2.0,6,y
3,4.0,8,w


In [90]:
df.sort_values(by="B", ascending=True)
df

Unnamed: 0,A,B,C
0,1.0,5,x
1,2.0,6,y
3,4.0,8,w


In [91]:
df1 = pd.DataFrame({"ID": [1, 2], "Name": ["Alice", "Bob"]})
df2 = pd.DataFrame({"ID": [1, 2], "Score": [85, 90]})

merged = pd.merge(df1, df2, on="ID")
print(merged)


   ID   Name  Score
0   1  Alice     85
1   2    Bob     90


In [92]:
df_pivot = pd.DataFrame({
    "City": ["NY", "NY", "LA", "LA"],
    "Year": [2020, 2021, 2020, 2021],
    "Sales": [100, 150, 80, 120]
})

df_pivot.pivot(index="Year", columns="City", values="Sales")


City,LA,NY
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
2020,80,100
2021,120,150


In [93]:
df_reset = df.set_index("C")
df_reset

Unnamed: 0_level_0,A,B
C,Unnamed: 1_level_1,Unnamed: 2_level_1
x,1.0,5
y,2.0,6
w,4.0,8


In [94]:
df_reset.reset_index()
df_reset

Unnamed: 0_level_0,A,B
C,Unnamed: 1_level_1,Unnamed: 2_level_1
x,1.0,5
y,2.0,6
w,4.0,8


In [95]:
df.shape      # (rows, columns)


(3, 3)

In [99]:
df.describe() # statistical summary


Unnamed: 0,A,B
count,3.0,3.0
mean,2.333333,6.333333
std,1.527525,1.527525
min,1.0,5.0
25%,1.5,5.5
50%,2.0,6.0
75%,3.0,7.0
max,4.0,8.0


In [98]:
df.info()     # summary


<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 0 to 3
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   A       3 non-null      float64
 1   B       3 non-null      int64  
 2   C       3 non-null      object 
dtypes: float64(1), int64(1), object(1)
memory usage: 96.0+ bytes


In [97]:
df.dtypes     # data types of each column


Unnamed: 0,0
A,float64
B,int64
C,object


In [96]:
df.columns    # column names


Index(['A', 'B', 'C'], dtype='object')

In [100]:
data = {
    "col1": [1, 2, 3, 4, 5],
    "col2": [11, 21, 31, 41, 51]
}
df = pd.DataFrame(data)
print(df)

   col1  col2
0     1    11
1     2    21
2     3    31
3     4    41
4     5    51


In [101]:
print(df.corr())

      col1  col2
col1   1.0   1.0
col2   1.0   1.0


In [102]:
print(df.cov())

      col1   col2
col1   2.5   25.0
col2  25.0  250.0


In [106]:
data_with_duplicates = {
    "col1": [1, 2, 1, 3, 2],
    "col2": ["A", "B", "A", "C", "B"],
    "col3": [1, 2, 1, 3, 2]
}
df_duplicates = pd.DataFrame(data_with_duplicates)

print("DataFrame with duplicate rows:")
df_duplicates

DataFrame with duplicate rows:


Unnamed: 0,col1,col2,col3
0,1,A,1
1,2,B,2
2,1,A,1
3,3,C,3
4,2,B,2


In [109]:
df_duplicates.drop_duplicates(inplace=True)
df_duplicates

Unnamed: 0,col1,col2,col3
0,1,A,1
1,2,B,2
3,3,C,3


In [110]:
df_duplicates.rename(columns={'col1': 'column1'}, inplace=True)
df_duplicates

Unnamed: 0,column1,col2,col3
0,1,A,1
1,2,B,2
3,3,C,3
