# NumPy vs Python Lists: Performance & Usage Guide
This notebook demonstrates the difference between **Python lists** and **NumPy arrays**, their performance, memory usage, and various operations.

## NumPy vs. Python Lists – Performance Test

In [3]:
import numpy as np
import time

# Python list
size = 1_000_000
list1 = list(range(size))
list2 = list(range(size))

start = time.time()
result = [x + y for x, y in zip(list1, list2)]
print(result[0:10])
end = time.time()
print("Python list addition time:", end - start)

# NumPy array
arr1 = np.array(list1)
arr2 = np.array(list2)

start = time.time()
result = arr1 + arr2  # Vectorized operation
print(result[0:10])
end = time.time()
print("NumPy array addition time:", end - start)

[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]
Python list addition time: 0.16579604148864746
[ 0  2  4  6  8 10 12 14 16 18]
NumPy array addition time: 0.0631406307220459


## Creating NumPy Arrays

In [5]:
import numpy as np

# Creating a 1D NumPy array
arr1 = np.array([1, 2, 3, 4, 5])
print(arr1)

# Creating a 2D NumPy array
arr2 = np.array([[1, 2, 3], [4, 5, 6]])
print(arr2)

# Checking type and shape
print("Type:", type(arr1))
print("Shape:", arr2.shape)

[1 2 3 4 5]
[[1 2 3]
 [4 5 6]]
Type: <class 'numpy.ndarray'>
Shape: (2, 3)


## Memory Efficiency – NumPy vs. Lists

In [4]:
import sys

list_data = list(range(1000))
numpy_data = np.array(list_data)

print("Python list size:", sys.getsizeof(list_data) * len(list_data), "bytes")
print("NumPy array size:", numpy_data.nbytes, "bytes")

Python list size: 8056000 bytes
NumPy array size: 8000 bytes


## Vectorization – No More Loops!

In [6]:
# Python list (loop-based)
list_squares = [x ** 2 for x in list1]

# NumPy (vectorized)
numpy_squares = arr1 ** 2

## Creating NumPy Arrays
### 1.From Python Lists:

In [None]:
import numpy as np

arr1 = np.array([1, 2, 3, 4, 5])  # 1D array
arr2 = np.array([[1, 2, 3], [4, 5, 6]])  # 2D array

print(arr1)  # [1 2 3 4 5]
print(arr2)  
# [[1 2 3]
#  [4 5 6]]

### 2.Creating Arrays from Scratch:

In [8]:
np.zeros((3, 3))    # 3x3 array of zeros
np.ones((2, 4))     # 2x4 array of ones
np.full((2, 2), 7)  # 2x2 array filled with 7
np.eye(4)           # 4x4 identity matrix
np.arange(1, 10, 2) # [1, 3, 5, 7, 9] (like range)
np.linspace(0, 1, 5) # [0. 0.25 0.5 0.75 1.] (evenly spaced)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

##  Checking Array Properties

In [9]:
arr = np.array([[10, 20, 30], [40, 50, 60]])

print("Shape:", arr.shape)   # (2, 3) → 2 rows, 3 columns
print("Size:", arr.size)     # 6 → total elements
print("Dimensions:", arr.ndim) # 2 → 2D array
print("Data type:", arr.dtype) # int64 (or int32 on Windows)

Shape: (2, 3)
Size: 6
Dimensions: 2
Data type: int64


## Changing Data Types

In [None]:

# Changing Data Types
arr = np.array([1, 2, 3], dtype=np.float32)  # Explicit type
print(arr.dtype)  # float32

arr_int = arr.astype(np.int32)  # Convert float to int
print(arr_int)  # [1 2 3]

## Reshaping and Flattening Arrays

In [11]:

arr = np.array([[1, 2, 3], [4, 5, 6]])
print(arr.shape)  # (2, 3)

reshaped = arr.reshape((3, 2))  # Change shape
print(reshaped)
# [[1 2]
#  [3 4]
#  [5 6]]

flattened = arr.flatten()  # Convert 2D → 1D
print(flattened)  # [1 2 3 4 5 6]

(2, 3)
[[1 2]
 [3 4]
 [5 6]]
[1 2 3 4 5 6]


## Indexing (Same as Python Lists)

In [4]:

arr = np.array([10, 20, 30, 40])
print(arr[0])  # 10
print(arr[-1]) # 40

10
40


## Slicing (Extracting Parts of an Array)

In [5]:

arr = np.array([10, 20, 30, 40, 50])

print(arr[1:4])  # [20 30 40] (slice from index 1 to 3)
print(arr[:3])   # [10 20 30] (first 3 elements)
print(arr[::2])  # [10 30 50] (every 2nd element)

[20 30 40]
[10 20 30]
[10 30 50]


## Slicing (Extracting Parts of an Array)

In [6]:



sliced = arr[1:4]
sliced[0] = 999
print(arr)  # [10 999 30 40 50]

[ 10 999  30  40  50]


## Fancy Indexing (Select Multiple Elements)

In [7]:


arr = np.array([10, 20, 30, 40, 50])
idx = [0, 2, 4]  # Indices to select
print(arr[idx])  # [10 30 50]

[10 30 50]


## Boolean Masking (Filter Data)

In [9]:

arr = np.array([10, 20, 30, 40, 50])
mask = arr > 25  # Condition: values greater than 25
print(arr[mask])  # [30 40 50]


[30 40 50]


# Multidimensional Indexing and Axis

In [11]:

import numpy as np

arr = np.array([[1, 2, 3], 
                [4, 5, 6], 
                [7, 8, 9]])

print(arr)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


## Sum along axes:

In [12]:
print(np.sum(arr, axis=0))  # Sum along rows (down each column)
print(np.sum(arr, axis=1))  # Sum along columns (across each row)

[12 15 18]
[ 6 15 24]


## Accessing an element

In [14]:

print(arr[1, 2])  # Row index 1, Column index 2 → Output: 6

6


In [4]:
print(arr[0:2, 1:3])  # Extracts first 2 rows and last 2 columns

[[2 3]
 [5 6]]


# Indexing in 3D Arrays

In [18]:
# For 3D arrays, the first index refers to the "depth" (sheets of data).
arr3D = np.array([[[1, 2, 3], [4, 5, 6]],
                  [[7, 8, 9], [10, 11, 12]]])

# Output of arr3D.shape is → (depth, rows, columns)
print(arr3D.shape)  # Output: (2, 2, 3) 

# First sheet, second row, third column
print(arr3D[0, 1, 2])  # Output: 6

print(arr3D[:, 0, :])   # Get the first row from both sheets


# Get all rows of the first column
first_col = arr[:, 0]
print(first_col)  # Output: [1 4 7]

# Get the first row from each "sheet" in a 3D array
first_rows = arr3D[:, 0, :]
print(first_rows)

# Replace all elements in column 1 with 0
arr[:, 1] = 0
print(arr)



(2, 2, 3)
6
[[1 2 3]
 [7 8 9]]
[1 4 7]
[[1 2 3]
 [7 8 9]]
[[1 0 3]
 [4 0 6]
 [7 0 9]]


# Useful NumPy Functions Cheat-Sheet

In [19]:
np.mean(arr)
np.std(arr)
np.var(arr)
np.min(arr)
np.max(arr)
np.sum(arr)
np.prod(arr)
np.median(arr)
np.percentile(arr, 50)
np.argmin(arr)
np.argmax(arr)
np.corrcoef(arr1, arr2)
np.unique(arr)
np.diff(arr)
np.cumsum(arr)
np.linspace(0, 10, 5)
np.log(arr)
np.exp(arr)

  np.log(arr)


array([[2.71828183e+00, 1.00000000e+00, 2.00855369e+01],
       [5.45981500e+01, 1.00000000e+00, 4.03428793e+02],
       [1.09663316e+03, 1.00000000e+00, 8.10308393e+03]])

## Vectorization & Broadcasting

In [20]:
arr = np.array([1, 2, 3, 4, 5])
result = []
for num in arr:
    result.append(num ** 2)
print(result)

arr = np.array([1, 2, 3, 4, 5])
result = arr ** 2
print(result)

arr = np.array([1, 2, 3, 4, 5])
result = arr + 10
print(result)

arr1 = np.array([1, 2, 3])
arr2 = np.array([10, 20, 30])
result = arr1 + arr2
print(result)

arr1 = np.array([[1, 2, 3], [4, 5, 6]])
arr2 = np.array([1, 2, 3])
result = arr1 + arr2
print(result)

[np.int64(1), np.int64(4), np.int64(9), np.int64(16), np.int64(25)]
[ 1  4  9 16 25]
[11 12 13 14 15]
[11 22 33]
[[2 4 6]
 [5 7 9]]


## Data types and Changing Data Types

In [22]:
arr = np.array([1, 2, 3, 4, 5])
print(arr.dtype)
arr = np.array([1.5, 2.7, 3.9])
print(arr.dtype)
arr_int = arr.astype(np.int32)
print(arr_int)
print(arr_int.dtype)

arr_large = np.array([1000000, 2000000, 3000000], dtype=np.int64)
arr_small = arr_large.astype(np.int32)
print(arr_small)
print(arr_small.dtype)

arr_int64 = np.array([1, 2, 3], dtype=np.int64)
arr_int32 = np.array([1, 2, 3], dtype=np.int32)
print(arr_int64.nbytes)
print(arr_int32.nbytes)

arr = np.array(['apple', 'banana', 'cherry'], dtype='U10')
print(arr)

arr = np.array([1 + 2j, 3 + 4j, 5 + 6j], dtype='complex128')
print(arr)

arr = np.array([{'a': 1}, [1, 2, 3], 'hello'], dtype=object)
print(arr)

int64
float64
[1 2 3]
int32
[1000000 2000000 3000000]
int32
24
12
['apple' 'banana' 'cherry']
[1.+2.j 3.+4.j 5.+6.j]
[{'a': 1} list([1, 2, 3]) 'hello']


In [8]:
import pandas as pd 
data ={
"Name":["Nawaira", "Usman", "Laiba", "Mufti Sahab"],
"AI":[10, 98, 99, 89],
"ML":[100, 98, 99, 89],
 "SQL":[10, 98, 99, 89],   
}

df=pd.DataFrame(data, columns=["Name", "AI", "ML", "SQL"])
df

Unnamed: 0,Name,AI,ML,SQL
0,Nawaira,10,100,10
1,Usman,98,98,98
2,Laiba,99,99,99
3,Mufti Sahab,89,89,89


In [20]:
d=df.melt(id_vars=["Name"],value_vars=["AI", "ML", "SQL"], var_name="Subjects",value_name="Obtained marks")
d.pivot(index="Name", columns="Subjects",values="Obtained marks")

Subjects,AI,ML,SQL
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Laiba,99,99,99
Mufti Sahab,89,89,89
Nawaira,10,100,10
Usman,98,98,98


In [20]:
data_dep={
    "DeptId":[1,2,3,4],
    "DeptName":["Hr", "Sales", "IT", "Finance"],
    "DeptHead":["Usman", "KaleemUllah", "Nawaira", "Laiba"]
}
df_dep=pd.DataFrame(data_dep)
df_dep

Unnamed: 0,DeptId,DeptName,DeptHead
0,1,Hr,Usman
1,2,Sales,KaleemUllah
2,3,IT,Nawaira
3,4,Finance,Laiba


In [23]:
data_emp={
    "ID":[1,2,3,4],
    "Name":["Hira", "Salman", "Inshaal", "Zaid"],
    "DeptId":[1, 2,0, 0]
}
df_emp=pd.DataFrame(data_emp)
df_emp

Unnamed: 0,ID,Name,DeptId
0,1,Hira,1
1,2,Salman,2
2,3,Inshaal,0
3,4,Zaid,0


In [27]:
pd.merge(df_dep,df_emp,on="DeptId",how="outer")

Unnamed: 0,DeptId,DeptName,DeptHead,ID,Name
0,0,,,3.0,Inshaal
1,0,,,4.0,Zaid
2,1,Hr,Usman,1.0,Hira
3,2,Sales,KaleemUllah,2.0,Salman
4,3,IT,Nawaira,,
5,4,Finance,Laiba,,
