# Project 1:  NumPy Data Explorer

##
# NumPy Fundamentals for Data Analysis

This notebook covers foundational NumPy concepts required for data
analysis, including array creation, indexing, reshaping, broadcasting,
and basic numerical operations.

## Arrays

### Array Creation

In [5]:
# Import the library

import numpy as np

In [6]:
# creating a 1D array from list
X = [10, 20, 30, 40, 50, 60, 34, 3, 2]
arr1 = np.array(X)
arr1

array([10, 20, 30, 40, 50, 60, 34,  3,  2])

In [7]:
# 1D array
a = np.array([5, 1, 2, 3, 4])
a

array([5, 1, 2, 3, 4])

In [8]:
# Creating a 2D array from a list

Y = [[11, 12, 13], [21, 22, 23], [31, 32, 33]]
arr2 = np.array(Y)
arr2

array([[11, 12, 13],
       [21, 22, 23],
       [31, 32, 33]])

In [9]:
# 2D array
b = np.array([[1, 2, 3], [4, 5, 6], [11, 15, 20]])
b

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [11, 15, 20]])

### Creating Arrays using Built-in Functions

In [11]:
np.zeros((4, 3))

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [12]:
np.ones((4, 3))

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [13]:
# identity matrix
np.eye(4, 4)

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

### Creating Arrays using random function

In [15]:
np.random.random(3)

array([0.48157385, 0.80358486, 0.53975324])

In [16]:
np.random.rand(3, 3)

array([[0.13511625, 0.22127688, 0.40801202],
       [0.9591166 , 0.17956339, 0.87445118],
       [0.5698329 , 0.78900084, 0.71509118]])

In [17]:
np.random.seed(22) # seed reproducibility
np.random.randint(1, 10, (3, 3)) #values between 1 and 10

array([[6, 5, 1],
       [5, 7, 7],
       [5, 9, 5]])

### Array Attributes

In [19]:
#Attribute shape returns a tuple corresponding to the size or number of each dimension.
arr1.shape

(9,)

In [20]:
arr2.shape

(3, 3)

In [21]:
#The total number of elements in the array is given by the attribute size
arr1.size

9

In [22]:
arr2.size

9

In [23]:
#Attribute ndim to obtain the number of axes or dimensions, referred to as the rank
arr1.ndim

1

In [24]:
arr2.ndim

2

In [25]:
#If we check the type of the array we get numpy.ndarray
type(arr1)

numpy.ndarray

In [26]:
type(arr2)

numpy.ndarray

In [27]:
# Check the type of the values stored in numpy array:we can use the attribute "dtype" to obtain the data type of the array’s elements

a.dtype

dtype('int32')

In [28]:
arr1.dtype

dtype('int32')

In [29]:
arr2.dtype

dtype('int32')

In [30]:
#Checking NumPy Version
#The version string is stored under __version__ attribute.
print(np.__version__)

1.26.4


### Assign value

In [32]:
# Assign the first element to 100

arr1[0] = 100
arr1

array([100,  20,  30,  40,  50,  60,  34,   3,   2])

In [33]:
# Assign the 5th element to 0

arr1[4] = 0
arr1

array([100,  20,  30,  40,   0,  60,  34,   3,   2])

In [34]:
arr2

array([[11, 12, 13],
       [21, 22, 23],
       [31, 32, 33]])

In [35]:
arr2[2] = 40
arr2

array([[11, 12, 13],
       [21, 22, 23],
       [40, 40, 40]])

In [36]:
arr2[1, 2] = 50
arr2

array([[11, 12, 13],
       [21, 22, 50],
       [40, 40, 40]])

### Array indexing and slicing

#### Slicing in python means taking the elements from the given index to another given index.

We pass slice like this: [start:end].The element at end index is not being included in the output.
We can also define the steps in slicing, like this: [start:end:step].

In [39]:
arr1

array([100,  20,  30,  40,   0,  60,  34,   3,   2])

In [40]:
arr2

array([[11, 12, 13],
       [21, 22, 50],
       [40, 40, 40]])

In [41]:
# Slicing the numpy array
arr = arr1[1:4]
arr

array([20, 30, 40])

In [42]:
arr2[0, 1]

12

In [43]:
arr[1:5:2]

array([30])

In [44]:
#If we don't pass start its considered 0
arr[:4]

array([20, 30, 40])

In [45]:
#If we don't pass end it considers till the length of array.
arr[4:]

array([], dtype=int32)

In [46]:
#If we don't pass step its considered 1
arr[1:5:]

array([30, 40])

In [47]:
arr2[0:2, 0:2]

array([[11, 12],
       [21, 22]])

In [48]:
#extract the last 4 elements
arr1[-4:]

array([60, 34,  3,  2])

# Performed mathematical, axis-wise and statistical operations on datasets

### Mathematical Operations

In [51]:
np.sqrt(arr1)

array([10.        ,  4.47213595,  5.47722558,  6.32455532,  0.        ,
        7.74596669,  5.83095189,  1.73205081,  1.41421356])

In [52]:
arr1.sum()

289

In [53]:
# Add the constant to array

arr1 + 20

array([120,  40,  50,  60,  20,  80,  54,  23,  22])

In [54]:
c = np.subtract(arr2, b)

c

array([[10, 10, 10],
       [17, 17, 44],
       [29, 25, 20]])

In [55]:
# Numpy Array Multiplication

m = np.multiply(arr2, b)
m

array([[ 11,  24,  39],
       [ 84, 110, 300],
       [440, 600, 800]])

In [56]:
d = np.divide(arr2, b)
d

array([[11.        ,  6.        ,  4.33333333],
       [ 5.25      ,  4.4       ,  8.33333333],
       [ 3.63636364,  2.66666667,  2.        ]])

In [57]:
# Calculate the dot product

Z = np.dot(arr2, b)
Z

array([[ 202,  277,  365],
       [ 659,  902, 1195],
       [ 640,  880, 1160]])

In [58]:
# Calculate the sine of Z

np.sin(Z)

array([[ 0.80641841,  0.51400431,  0.5440464 ],
       [-0.67018443, -0.35499472,  0.93013916],
       [-0.7738525 ,  0.34670601, -0.68332774]])

## Linspace


A useful function for plotting mathematical functions is <code>linspace</code>.   Linspace returns evenly spaced numbers over a specified interval. 

 **numpy.linspace(start, stop, num = int value)**
 
start  :  start of interval range

stop   :  end of interval range

num    :  Number of samples to generate.


In [60]:
# Make a numpy array within [-2, 2] and 9 elements

np.linspace(-2, 2, num=9)

array([-2. , -1.5, -1. , -0.5,  0. ,  0.5,  1. ,  1.5,  2. ])

In [61]:
# Make a numpy array within [0, 2π] and 100 elements 

x = np.linspace(0, 2*np.pi, num=100)
x

array([0.        , 0.06346652, 0.12693304, 0.19039955, 0.25386607,
       0.31733259, 0.38079911, 0.44426563, 0.50773215, 0.57119866,
       0.63466518, 0.6981317 , 0.76159822, 0.82506474, 0.88853126,
       0.95199777, 1.01546429, 1.07893081, 1.14239733, 1.20586385,
       1.26933037, 1.33279688, 1.3962634 , 1.45972992, 1.52319644,
       1.58666296, 1.65012947, 1.71359599, 1.77706251, 1.84052903,
       1.90399555, 1.96746207, 2.03092858, 2.0943951 , 2.15786162,
       2.22132814, 2.28479466, 2.34826118, 2.41172769, 2.47519421,
       2.53866073, 2.60212725, 2.66559377, 2.72906028, 2.7925268 ,
       2.85599332, 2.91945984, 2.98292636, 3.04639288, 3.10985939,
       3.17332591, 3.23679243, 3.30025895, 3.36372547, 3.42719199,
       3.4906585 , 3.55412502, 3.61759154, 3.68105806, 3.74452458,
       3.8079911 , 3.87145761, 3.93492413, 3.99839065, 4.06185717,
       4.12532369, 4.1887902 , 4.25225672, 4.31572324, 4.37918976,
       4.44265628, 4.5061228 , 4.56958931, 4.63305583, 4.69652

### Statistical Operations

In [63]:
#loading the dataset
import pandas as  pd
df = pd.read_csv('2025sales.csv')
df.sample(5)

Unnamed: 0,Order Line,Order ID,Bar code value,Order Date,Ship Date,Ship Mode,Customer ID,Product ID,Sales,Quantity,Discount,Profit
1601,9553,OF-2023-113355,15375573,24-05-2025,28-05-2025,Standard Class,SJ-20215,TEC-PH-10004912,219.8,5,0.2,24.7275
1171,9123,ON-2023-105998,60203444,26-04-2025,28-04-2025,First Class,CR-12580,FUR-TA-10001095,1673.184,12,0.2,20.9148
1599,9551,OF-2023-144862,26455553,24-05-2025,28-05-2025,Standard Class,EH-14005,OFF-EN-10003040,104.68,5,0.2,35.3295
552,8504,ON-2023-142188,26697579,04-03-2025,04-03-2025,Same Day,JF-15415,FUR-CH-10003199,177.568,2,0.2,8.8784
1209,9161,OF-2023-102925,37899850,28-04-2025,03-05-2025,Second Class,CD-12280,FUR-CH-10004875,128.124,2,0.1,24.2012


In [64]:
# Get the mean of numpy array

mean_profit = df['Profit'].mean()
mean_profit

25.543569580078128

In [65]:
# Get the standard deviation of numpy array

standard_deviation_quantity=df['Quantity'].std()
standard_deviation_quantity

2.260251622233217

In [66]:
# Get the biggest value in the numpy array

max_sales = df['Sales'].max(axis = 0)
max_sales

11199.968

In [67]:
# Get the smallest value in the numpy array

min_discount = df['Discount'].min()
min_discount

0.0

# Applied reshaping and bradcasting techniques for efficient computation

In [69]:
# Reshaping refers to changing the dimensions or structure of a NumPy array without altering its data. 
# Examples include reshape, transpose, flatten, and split.
print(arr1)
y = arr1.reshape(3, 3)
y

[100  20  30  40   0  60  34   3   2]


array([[100,  20,  30],
       [ 40,   0,  60],
       [ 34,   3,   2]])

In [70]:
# transpose
y.T

array([[100,  40,  34],
       [ 20,   0,   3],
       [ 30,  60,   2]])

In [71]:
flat= y.flatten()
flat

array([100,  20,  30,  40,   0,  60,  34,   3,   2])

In [72]:
z = np.split(flat, 3)
a, b, c = z
print(a)
print(b)
print(c)

[100  20  30]
[40  0 60]
[34  3  2]


In [73]:
# Broadcasting is a NumPy mechanism that allows arrays of different shapes to be used together in arithmetic operations by automatically expanding 
# the smaller array to match the larger one.
broadcasted = arr1 + 10
broadcasted

array([110,  30,  40,  50,  10,  70,  44,  13,  12])

# Implemented save/load performance for numpy arrays

In [75]:
np.save("my_array1.npy", arr1)   #to save when we have one NumPy array and saving exactly as it is

In [76]:
np.save("my_array2.npy", arr2)

In [77]:
loaded_arr1 = np.load("my_array1.npy")
print(loaded_arr1)
print(loaded_arr1.shape)

[100  20  30  40   0  60  34   3   2]
(9,)


In [78]:
loaded_arr2 = np.load("my_array2.npy")
print(loaded_arr2)
print(loaded_arr2.shape)

[[11 12 13]
 [21 22 50]
 [40 40 40]]
(3, 3)


In [79]:
np.savez("array_collections.npz", first = arr1, second = arr2)

In [80]:
data = np.load("array_collections.npz")
print(data.files)
print(data['first'])
print(data['second'])

['first', 'second']
[100  20  30  40   0  60  34   3   2]
[[11 12 13]
 [21 22 50]
 [40 40 40]]


# Compared Numpy's performance with standard Python list

In [82]:
# Performance comparison
import time
python_list = list(range(1000000))
numpy_array = np.array(range(1000000))

start = time.time()
[x*2 for x in python_list]
print("Python list time:", time.time()-start)

start = time.time()
numpy_array * 2
print("NumPy time:", time.time()-start)

Python list time: 0.07389616966247559
NumPy time: 0.008181571960449219
