In [1]:
import numpy as np

# ARRAY CREATION :
various methods to create arrays

In [2]:
# Create a 1D array
array_1d = np.array([1, 2, 3, 4, 5])
print("1D Array:", array_1d)

1D Array: [1 2 3 4 5]


In [3]:
# Create a 2D array
array_2d = np.array([[1, 2, 3], [4, 5, 6]])
print("2D Array:\n", array_2d)

2D Array:
 [[1 2 3]
 [4 5 6]]


In [4]:
#creating an array with all zeros
zeros_array = np.zeros((3, 3))
print("Zeros Array:\n", zeros_array)

Zeros Array:
 [[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


In [5]:
# Create an array with all ones
ones_array = np.ones((2, 4))
print("Ones Array:\n", ones_array)

Ones Array:
 [[1. 1. 1. 1.]
 [1. 1. 1. 1.]]


In [6]:
# Create an array with a range of values
range_array = np.arange(10, 20, 2)
print("Range Array:", range_array)

Range Array: [10 12 14 16 18]


# DATA MANIPULATION :
INDEXING :
accessing specific elements in 1D and 2D arrays

In [7]:
# Access elements in a 1D array
element_1d = array_1d[2]  
print("Element at index 2 in 1D Array:", element_1d)

Element at index 2 in 1D Array: 3


In [8]:
# Access elements in a 2D array
element_2d = array_2d[1, 2]  
print("Element at (1, 2) in 2D Array:", element_2d)

Element at (1, 2) in 2D Array: 6


SLICING :

In [9]:
# Slice a 1D array
slice_1d = array_1d[1:4]  
print("Slice of 1D Array [1:4]:", slice_1d)

Slice of 1D Array [1:4]: [2 3 4]


In [10]:
# Slice a 2D array
slice_2d = array_2d[0:2, 1:3] 
print("Slice of 2D Array [0:2, 1:3]:\n", slice_2d)

Slice of 2D Array [0:2, 1:3]:
 [[2 3]
 [5 6]]


RESHAPING : CHANGING THE SHAPE OF AN ARRAY

In [11]:
# Reshape a 1D array to a 2D array
reshaped_array = array_1d.reshape((1, 5))
print("Reshaped 1D Array to 2D:\n", reshaped_array)

Reshaped 1D Array to 2D:
 [[1 2 3 4 5]]


In [12]:
flattened_array = array_2d.flatten()
print("Flattened 2D Array:", flattened_array)

Flattened 2D Array: [1 2 3 4 5 6]


# MATHEMATICAL OPERATIONS
performing basic operations or matrix operations 
# basic operations 

In [13]:
# addition
add_result = array_1d + 5
print("addition:\n", add_result)

addition:
 [ 6  7  8  9 10]


In [14]:
#multiplication
mul_result = array_2d * 2
print("multiplication:\n", mul_result)

multiplication:
 [[ 2  4  6]
 [ 8 10 12]]


In [15]:
# return no .of dimensions in array
array_1d.ndim

1

In [16]:
# return the shape of array
array_1d.shape

(5,)

In [17]:
# return the size of array
array_1d.size

5

In [18]:
# return the data type of array
array_2d.dtype

dtype('int32')

In [19]:
# Matrix multiplication
matrix_mult_result = np.dot(array_2d, np.array([[1], [2], [3]]))
print("Matrix multiplication result:\n", matrix_mult_result)

Matrix multiplication result:
 [[14]
 [32]]


In [20]:
# return the size of each element in array
array_2d.itemsize

4

In [21]:
n1=np.array([0,np.pi/2,np.pi])

In [22]:
# exponential function
np.exp(n1)

array([ 1.        ,  4.81047738, 23.14069263])

In [23]:
np.sin(n1)

array([0.0000000e+00, 1.0000000e+00, 1.2246468e-16])

In [24]:
np.cos(n1)

array([ 1.000000e+00,  6.123234e-17, -1.000000e+00])

In [25]:
np.tan(n1)

array([ 0.00000000e+00,  1.63312394e+16, -1.22464680e-16])

In [26]:
np.sqrt(n1)

array([0.        , 1.25331414, 1.77245385])

# boolean indexing 

In [27]:
bool_indexing = array_2d[array_2d > 3]  
print("Elements greater than 3 in 2D Array:", bool_indexing)

Elements greater than 3 in 2D Array: [4 5 6]


# DATA AGGREGATION 

In [28]:
data=np.random.randint(1,100,(5,4))
print(data)

[[13 18 77  4]
 [56 92 75 93]
 [ 7 55 78 22]
 [23 18 98 59]
 [72 67 44 56]]


MEAN 

In [29]:
mean_value = np.mean(data)
print("Mean:", mean_value)

Mean: 51.35


MEDIAN

In [30]:
median_value = np.median(data)
print("Median:", median_value)

Median: 56.0


STANDARD DEVIATION 

In [31]:
std_dev = np.std(data)
print("Standard Deviation:", std_dev)

Standard Deviation: 29.91701021158364


SUM

In [32]:
total_sum = np.sum(data)
print("Sum:", total_sum)

Sum: 1027


In [33]:
total_percentile=np.percentile(data,50)
print("percentile:",total_percentile)

percentile: 56.0


COMPUTE STATISTICS ALONG AXES

In [34]:
# Mean along rows (axis=1)
mean_rows = np.mean(data, axis=1)
print("Mean along rows:", mean_rows)

Mean along rows: [28.   79.   40.5  49.5  59.75]


In [35]:
# Mean along columns (axis=0)
mean_columns = np.mean(data, axis=0)
print("Mean along columns:", mean_columns)

Mean along columns: [34.2 50.  74.4 46.8]


In [36]:
# Median along rows (axis=1)
median_rows = np.median(data, axis=1)
print("Median along rows:", median_rows)

Median along rows: [15.5 83.5 38.5 41.  61.5]


In [37]:
# Median along columns (axis=0)
median_columns = np.median(data, axis=0)
print("Median along columns:", median_columns)

Median along columns: [23. 55. 77. 56.]


In [38]:
# Sum along columns (axis=0)
sum_columns = np.sum(data, axis=0)
print("Sum along columns:", sum_columns)

Sum along columns: [171 250 372 234]


# DATA ANALYSIS USING NUMPY 
creating a sample dataset 

In [39]:
np.random.seed(0) 
data = np.random.rand(1000, 3) * 100 
print("Sample Data:\n", data[:5]) 

Sample Data:
 [[54.88135039 71.51893664 60.27633761]
 [54.4883183  42.36547993 64.58941131]
 [43.75872113 89.17730008 96.36627605]
 [38.34415188 79.17250381 52.88949198]
 [56.80445611 92.55966383  7.10360582]]


finding coorelatations 

In [41]:
# Compute the correlation matrix
correlation_matrix = np.corrcoef(data, rowvar=False)
print("Correlation Matrix:\n", correlation_matrix)

Correlation Matrix:
 [[1.         0.01549005 0.06686736]
 [0.01549005 1.         0.03008067]
 [0.06686736 0.03008067 1.        ]]


identifying outliers

In [42]:
from scipy import stats
z_scores = np.abs(stats.zscore(data, axis=0))
threshold = 3
outliers = (z_scores > threshold)
print("Outliers (boolean mask):\n", outliers[:5])

Outliers (boolean mask):
 [[False False False]
 [False False False]
 [False False False]
 [False False False]
 [False False False]]


calculating percentiles

In [44]:
# Calculate percentiles for each feature
percentiles_25 = np.percentile(data, 25, axis=0)
percentiles_50 = np.percentile(data, 50, axis=0)
percentiles_75 = np.percentile(data, 75, axis=0)

print("25th Percentile:\n", percentiles_25)
print("50th Percentile (Median):\n", percentiles_50)
print("75th Percentile:\n", percentiles_75)

25th Percentile:
 [24.58891912 25.95436298 27.41549352]
50th Percentile (Median):
 [50.47048025 49.83071758 49.90504209]
75th Percentile:
 [77.09744017 75.64236137 75.52793214]


# NORMALISING DATA

In [45]:
# Normalize data (zero mean, unit variance)
normalized_data = (data - np.mean(data, axis=0)) / np.std(data, axis=0)
print("Normalized Data:\n", normalized_data)

Normalized Data:
 [[ 0.14563521  0.74332202  0.32774122]
 [ 0.13245741 -0.25700861  0.47815644]
 [-0.22729068  1.34922622  1.58635079]
 ...
 [-0.10113331 -0.72490168  1.29347258]
 [-0.73913842  1.52049433  0.13024615]
 [-0.60942758  1.08080574  0.65778068]]


# CORRELATION AND COVARIANCE

In [46]:
# Compute correlation matrix
correlation_matrix = np.corrcoef(data, rowvar=False)
print("Correlation Matrix:\n", correlation_matrix)

# Compute covariance matrix
covariance_matrix = np.cov(data, rowvar=False)
print("Covariance Matrix:\n", covariance_matrix)

Correlation Matrix:
 [[1.         0.01549005 0.06686736]
 [0.01549005 1.         0.03008067]
 [0.06686736 0.03008067 1.        ]]
Covariance Matrix:
 [[890.43937236  13.47779076  57.24383263]
 [ 13.47779076 850.21250369  25.16306696]
 [ 57.24383263  25.16306696 823.04719294]]


# APPLICATIONS OF DATA SCIENCE 

In this program, the use of NumPy is a critical aspect for any data science professional due to its efficiency, versatility, and performance in handling numerical computations. NumPy, with its powerful n-dimensional array objects (ndarrays), provides a foundation for numerical analysis in Python that surpasses traditional Python data structures like lists and tuples.

Advantages of Using NumPy over Traditional Python Data Structures
1.Performance: NumPy arrays are stored in contiguous blocks of memory, unlike Python lists which are stored as an array of pointers. This leads to much faster execution of operations in NumPy, particularly when dealing with large datasets. Operations on NumPy arrays are implemented in C, which further accelerates computation.

2.Memory Efficiency: NumPy arrays use significantly less memory than Python lists. This is crucial when working with large datasets, as it reduces the memory overhead and allows for efficient storage and manipulation of data.

3.Vectorization: NumPy allows for vectorized operations, meaning that batch operations can be performed on entire arrays without the need for explicit loops. This not only makes the code more concise and readable but also significantly speeds up execution.

4.Broad Functionality: NumPy provides a wide range of mathematical and statistical functions that are essential for data analysis. These include linear algebra operations, Fourier transformations, random number generation, and more, which are not only optimized but also easier to implement compared to traditional Python methods.

5.Integration with Other Libraries: NumPy serves as the backbone for many other data science libraries like pandas, SciPy, scikit-learn, and TensorFlow. This makes it indispensable for anyone looking to work in the field of data science or machine learning.

6.Advanced Mathematical Functions:NumPy provides a wide array of mathematical functions, including linear algebra, statistical operations, and Fourier transforms, all of which are optimized for performance. These functions are not only faster but also more reliable than manually implementing the same operations in pure Python.

Integration with Other Libraries:

NumPy serves as the foundational library for many other data science libraries like Pandas, Scikit-learn, TensorFlow, and SciPy. This integration allows for seamless data manipulation, machine learning model development, and scientific computing.
Handling Multidimensional Data:

Unlike Python lists, NumPy arrays can handle multidimensional data easily, making it possible to work with matrices, tensors, and higher-dimensional arrays. This is particularly useful in areas like image processing, where data often exists in multiple dimensions.


Real-World Examples Where NumPy’s Capabilities Are Crucial
1.Machine Learning: NumPy is heavily used in machine learning for operations such as matrix multiplication, which is fundamental for algorithms like linear regression, neural networks, and deep learning models. Libraries like TensorFlow and PyTorch rely on NumPy for tensor operations and gradient calculations.

2.Financial Analysis: In finance, NumPy is used to perform large-scale simulations and calculations like Monte Carlo simulations, which are essential for risk analysis and option pricing. The ability to handle large datasets efficiently is crucial for processing financial data.

3.Scientific Research: In scientific research, particularly in fields like physics, chemistry, and biology, NumPy is used for tasks such as image processing, signal processing, and solving differential equations. Its ability to handle multi-dimensional data arrays and perform complex mathematical operations makes it a go-to tool for scientists.

4.Data Manipulation: NumPy’s broadcasting and advanced indexing capabilities allow for sophisticated data manipulation tasks that are required in big data analytics. This is essential for cleaning, transforming, and analyzing large datasets in a time-efficient manner.


