In [4]:
import numpy as np

# Data arrays
names = np.array(['Anna', 'John', 'Maria', 'Peter', 'Michael', 'Stiven', 'Karen'])
ages = np.array([28.0, 34.0, 29.0, 40.0, 32.2, 32.2, 30.0])
cities = np.array(['Kyiv', 'London', 'Berlin', 'New York', 'Paris', 'New York', 'London'])
scores = np.array([85, 90, 88, 95, 95, 80, 75])


#### 1. Viewing elements of arrays
##### You can view the first few elements of an array with basic indexing and slicing in NumPy:

In [5]:
print(names[:5])  # First 5 elements
scores[-3:]  # Last 3 elements

['Anna' 'John' 'Maria' 'Peter' 'Michael']


array([95, 80, 75])

#### 2. Basic statistics
##### You can use NumPy's built-in statistical functions to perform summary statistics on numeric data like ages and scores:

In [14]:
print('Mean score: ',np.mean(scores).round(2))  # Mean score
print('Median age:',np.median(ages) ) # Median age
print('Standard deviation:',np.std(scores).round(2)) # Standard deviation of scores
print('Maximum score:',np.max(scores))   # Maximum score
print('Minimum age:',np.min(ages))     # Minimum age


Mean score:  86.86
Median age: 32.2
Standard deviation: 6.92
Maximum score: 95
Minimum age: 28.0


#### 3. Selecting elements based on conditions
##### You can filter elements in an array based on conditions:

In [9]:
print('Scores greater than 80: ',scores[scores > 80])  # Scores greater than 80
names[scores > 80]   # Names of people with scores greater than 80

Scores greater than 80:  [85 90 88 95 95]


array(['Anna', 'John', 'Maria', 'Peter', 'Michael'], dtype='<U7')

#### 4. Sorting arrays
##### You can sort your data with np.sort():

In [17]:
np.sort(scores)  # Sort scores in ascending order
sorted_indices = np.argsort(scores)  # Indices that would sort the array
print(sorted_indices)

# Sort names based on the sorted score indices
print(names[sorted_indices]) # Names sorted by their scores


[6 5 0 2 1 3 4]
['Karen' 'Stiven' 'Anna' 'Maria' 'John' 'Peter' 'Michael']


#### 5. Element-wise operations
##### You can perform element-wise operations on arrays:

In [10]:
print(ages)  
print('after adding 1: ',ages+1) # Add 1 to every age
print(scores)
print('after multiplication by 2: ',scores*2) # Multiply every score by 2


[28.  34.  29.  40.  32.2 32.2 30. ]
after adding 1:  [29.  35.  30.  41.  33.2 33.2 31. ]
[85 90 88 95 95 80 75]
after multiplication by 2:  [170 180 176 190 190 160 150]


#### 6. Adding a new array (like adding a column)
##### In NumPy, you would use np.column_stack() or np.vstack() to add a new "column":

In [11]:
pass_status = scores > 80  # Create a boolean array for passing status
print('Boolean array for passing status: ',pass_status)
combined_data = np.column_stack((names, ages, cities, scores, pass_status))  # Stack columns together
print('New data array: \n',combined_data)

Boolean array for passing status:  [ True  True  True  True  True False False]
New data array: 
 [['Anna' '28.0' 'Kyiv' '85' 'True']
 ['John' '34.0' 'London' '90' 'True']
 ['Maria' '29.0' 'Berlin' '88' 'True']
 ['Peter' '40.0' 'New York' '95' 'True']
 ['Michael' '32.2' 'Paris' '95' 'True']
 ['Stiven' '32.2' 'New York' '80' 'False']
 ['Karen' '30.0' 'London' '75' 'False']]


#### 7. Group by operations (manual approach)
##### NumPy doesn't have a direct equivalent of groupby, but you can achieve similar results using boolean indexing:

In [15]:
# Mean score by city
np.mean(scores[cities == 'London'])  # Average score for people from London

# Group by and calculate mean scores for each city
unique_cities = np.unique(cities)
print('Unique cities:   ',unique_cities)
mean_scores_by_city = {city: np.mean(scores[cities == city]) for city in unique_cities}
print(mean_scores_by_city)

Unique cities:    ['Berlin' 'Kyiv' 'London' 'New York' 'Paris']
{np.str_('Berlin'): np.float64(88.0), np.str_('Kyiv'): np.float64(85.0), np.str_('London'): np.float64(82.5), np.str_('New York'): np.float64(87.5), np.str_('Paris'): np.float64(95.0)}


#### 8. Handling missing values
##### In NumPy, np.nan is used for missing values. You can detect and fill missing values with functions like np.isnan() and np.nan_to_num():

In [38]:
# Example with missing values in ages
ages_with_nan = np.array([28.0, np.nan, 29.0, 40.0, 32.2, np.nan, 30.0])

print(np.isnan(ages_with_nan))  # Check for missing values
print(np.nan_to_num(ages_with_nan, nan=0))  # Replace NaNs with 0


[False  True False False False  True False]
[28.   0.  29.  40.  32.2  0.  30. ]


#### 9. Transposing and reshaping arrays
##### NumPy allows reshaping or transposing arrays. For instance, turning rows into columns or vice versa:

In [2]:
print(combined_data.T)  # Transpose the array


NameError: name 'combined_data' is not defined

#### 10. Basic indexing and slicing
##### You can access specific rows or columns in NumPy using indexing and slicing:

In [16]:
print(ages[0])  # First element in ages
print(ages[2:5])  # Elements from index 2 to 4 (inclusive)
print(names[ages > 30])  # Names of people older than 30


28.0
[29.  40.  32.2]
['John' 'Peter' 'Michael' 'Stiven']


### 11. Creating arrays (equivalent to initializing a DataFrame)
#### You can create arrays of specific shapes and values in NumPy:

In [19]:
print(np.zeros((3, 4)))  # Create a 3x4 array filled with zeros
print()
print(np.ones((2, 3)))   # Create a 2x3 array filled with ones
print(np.random.randn(5))  # Create an array with 5 random numbers from a normal distribution

[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]

[[1. 1. 1.]
 [1. 1. 1.]]
[-0.12791619 -0.41103115  0.54414545  0.53859643  1.27267784]


#### 12.  Setting a seed for reproducibility
##### If you want to generate random numbers that are the same every time you run the code, you can set the seed:

In [45]:
np.random.seed(42)
random_numbers = np.random.rand(5)
print(random_numbers)
print(random_numbers)  # Same output as above because the seed was reset

[0.37454012 0.95071431 0.73199394 0.59865848 0.15601864]
[0.37454012 0.95071431 0.73199394 0.59865848 0.15601864]
