# Data Aggregation

In [None]:
import numpy as np

data = np.array([[1, 2, 3],
                 [4, 5, 6],
                 [7, 8, 9]])

# Sum of all elements in the array
total_sum = np.sum(data)
print("Total Sum:", total_sum)

# Mean of all elements in the array
average = np.mean(data)
print("Average:", average)

# Sum along the rows (axis=1)
row_sums = np.sum(data, axis=1)
print("Row Sums:", row_sums)

# Maximum value in the array
max_value = np.max(data)
print("Max Value:", max_value)

# Minimum value in the array
min_value = np.min(data)
print("Min Value:", min_value)
print(data)

Total Sum: 45
Average: 5.0
Row Sums: [ 6 15 24]
Max Value: 9
Min Value: 1
[[1 2 3]
 [4 5 6]
 [7 8 9]]


# Data Grouping

In [None]:
import numpy as np

data = np.array(['A', 'B', 'A', 'C', 'B', 'A', 'C'])

# Find unique values and their counts
unique_values, value_counts = np.unique(data, return_counts=True)
print("Unique Values:", unique_values)
print("Value Counts:", value_counts)

# Group data based on unique values
grouped_data = {}
for unique_value in unique_values:
    grouped_data[unique_value] = data[data == unique_value]

# Print the groups
for key, value in grouped_data.items():
    print("Group:", key, "->", value)

Unique Values: ['A' 'B' 'C']
Value Counts: [3 2 2]
Group: A -> ['A' 'A' 'A']
Group: B -> ['B' 'B']
Group: C -> ['C' 'C']


# Data Splitting

In [None]:
import pandas as pd

data = pd.DataFrame({'A': [1, 2, 3, 4, 5],
                     'B': ['a', 'b', 'c', 'd', 'e']})

# Split the data into training and testing sets (80% train, 20% test)
from sklearn.model_selection import train_test_split

train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

In [None]:
print("Training Data:")
print(train_data)

Training Data:
   A  B
4  5  e
2  3  c
0  1  a
3  4  d


In [None]:
print("Testing Data:")
print(test_data)

Testing Data:
   A  B
1  2  b
