In [1]:
import numpy as np

# 1. Array Creation

# 1D Array
array_1d = np.array([1, 2, 3, 4, 5])
print("1D Array:", array_1d)

# 2D Array
array_2d = np.array([[1, 2, 3], [4, 5, 6]])
print("\n2D Array:\n", array_2d)

# 3D Array
array_3d = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])
print("\n3D Array:\n", array_3d)

# Using np.arange (1D)
array_range = np.arange(10)
print("\nArray using np.arange:", array_range)

# Using np.linspace (1D, evenly spaced values)
array_linspace = np.linspace(0, 1, 5)
print("\nArray using np.linspace:", array_linspace)

# 2. Basic Operations on Arrays

# Indexing
print("\nElement at index 2 in 1D array:", array_1d[2])
print("Element at row 1, column 2 in 2D array:", array_2d[1, 2])

# Slicing
print("\nSlice of 1D array (index 1 to 3):", array_1d[1:4])
print("Slice of 2D array (first 2 rows):\n", array_2d[:2])

# Reshaping
array_reshaped = array_range.reshape((2, 5))
print("\nReshaped array (2x5):\n", array_reshaped)

# Flattening
array_flattened = array_2d.flatten()
print("\nFlattened 2D array:", array_flattened)

# Concatenation
array_concatenated = np.concatenate((array_1d, np.array([6, 7, 8])))
print("\nConcatenated array:", array_concatenated)

# 3. Investigating Array Attributes

print("\nAttributes of 2D array:")
print("Shape:", array_2d.shape)
print("Size:", array_2d.size)
print("Data type (dtype):", array_2d.dtype)
print("Number of dimensions (ndim):", array_2d.ndim)

# 4. Reshape, Resize, Flatten

# Reshape
reshaped_array = array_range.reshape(2, 5)
print("\nReshaped array to 2x5:\n", reshaped_array)

# Resize (in-place)
array_resized = np.resize(array_1d, (2, 3))
print("\nResized array (2x3):\n", array_resized)

# Flatten (already demonstrated above)


1D Array: [1 2 3 4 5]

2D Array:
 [[1 2 3]
 [4 5 6]]

3D Array:
 [[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]]

Array using np.arange: [0 1 2 3 4 5 6 7 8 9]

Array using np.linspace: [0.   0.25 0.5  0.75 1.  ]

Element at index 2 in 1D array: 3
Element at row 1, column 2 in 2D array: 6

Slice of 1D array (index 1 to 3): [2 3 4]
Slice of 2D array (first 2 rows):
 [[1 2 3]
 [4 5 6]]

Reshaped array (2x5):
 [[0 1 2 3 4]
 [5 6 7 8 9]]

Flattened 2D array: [1 2 3 4 5 6]

Concatenated array: [1 2 3 4 5 6 7 8]

Attributes of 2D array:
Shape: (2, 3)
Size: 6
Data type (dtype): int32
Number of dimensions (ndim): 2

Reshaped array to 2x5:
 [[0 1 2 3 4]
 [5 6 7 8 9]]

Resized array (2x3):
 [[1 2 3]
 [4 5 1]]


In [2]:
import numpy as np

# 1. Load Dataset

# Loading a CSV file into a NumPy array (assuming the file has numerical data)
# You can replace 'dataset.csv' with your actual CSV file path.
data = np.genfromtxt('dataset.csv', delimiter=',', skip_header=1)
print("Loaded Data:\n", data)

# If you're dealing with a dataset that contains missing values, 
# the `np.genfromtxt` function will convert them to `np.nan`.

# 2. Clean and Preprocess the Data

# Handle missing values: Replace `np.nan` with the column mean (example of imputation)
# We use `np.nanmean` to calculate the mean excluding nan values.
nan_mask = np.isnan(data)
means = np.nanmean(data, axis=0)
data[nan_mask] = np.take(means, np.where(nan_mask)[1])

print("\nData after handling missing values:\n", data)

# 3. Normalization (Min-Max Scaling)

# Min-Max Scaling normalizes the data to a range between 0 and 1
data_min = np.min(data, axis=0)
data_max = np.max(data, axis=0)
data_normalized = (data - data_min) / (data_max - data_min)
print("\nNormalized Data (Min-Max Scaling):\n", data_normalized)

# 4. Standardization (Z-Score Scaling)

# Z-Score Standardization: Subtract the mean and divide by the standard deviation
data_mean = np.mean(data, axis=0)
data_std = np.std(data, axis=0)
data_standardized = (data - data_mean) / data_std
print("\nStandardized Data (Z-Score Scaling):\n", data_standardized)

# 5. Statistical Measures

# Mean
mean = np.mean(data, axis=0)
print("\nMean of each column:", mean)

# Median
median = np.median(data, axis=0)
print("Median of each column:", median)

# Standard Deviation
std_dev = np.std(data, axis=0)
print("Standard Deviation of each column:", std_dev)

# Variance
variance = np.var(data, axis=0)
print("Variance of each column:", variance)

# Other statistical measures (sum, min, max)
sum_values = np.sum(data, axis=0)
min_values = np.min(data, axis=0)
max_values = np.max(data, axis=0)

print("Sum of each column:", sum_values)
print("Min of each column:", min_values)
print("Max of each column:", max_values)


OSError: dataset.csv not found.