## Numpy 
- A NumPy array is called an ndarray. It can store multiple values of the same data type.

In [2]:
# Creating a 1D Array
import numpy as np


arr1D = np.array([1, 2, 3, 4, 5])

print(arr1D)
print(type(arr1D))

[1 2 3 4 5]
<class 'numpy.ndarray'>


In [7]:
# Creating a 2D Array
arr2D = np.array([[1, 2, 3], [4, 5, 6]])
print(arr2D)
print('\n')
print(type(arr2D))
print('\n')
print(arr2D.shape)
print('\n')
print(arr2D.size)

[[1 2 3]
 [4 5 6]]


<class 'numpy.ndarray'>


(2, 3)


6


In [10]:
#  Indexing & Slicing

arr = np.array([10, 20, 30, 40, 50])

arr[1:4]  # Slicing from index 1 to 3

array([20, 30, 40])

In [14]:


# Create a 2D array (matrix)
arr2d = np.array([
    [10, 20, 30, 40],
    [50, 60, 70, 80],
    [90, 100, 110, 120]
])

print("Original 2D Array:")
print(arr2d)
print('\n')
print(arr2d[1, 2])   # Element from 2nd row, 3rd column - Select a single element
print('\n')
print(arr2d[0:2, 1:3])   # Rows 0-1, Columns 1-2 - Slice rows and columns


Original 2D Array:
[[ 10  20  30  40]
 [ 50  60  70  80]
 [ 90 100 110 120]]


70


[[20 30]
 [60 70]]


In [27]:
# reshape Arrays - Changing the shape of an array
# Note: The total number of elements must remain the same
#Reshaping means changing the shape (rows × columns) of an array without changing its data.


# 1D array with 6 elements
arr = np.array([10, 20, 30, 40, 50, 60])
print("Original array:\n", arr)
print("Shape:", arr.shape)

#Reshape into 2 rows and 3 columns

reshaped_2d = arr.reshape(2, 3)
print("\nReshaped to 2x3:\n", reshaped_2d)

#Reshape into 3 rows and 2 columns
reshaped_3d = arr.reshape(3, 2)
print("\nReshaped to 3x2:\n", reshaped_3d)

#Using -1 to let NumPy infer one dimension
reshaped_infer = arr.reshape(2, -1)  # 2 columns, rows inferred
print("\nReshaped with inferred rows (2, -1):\n", reshaped_infer)    

# -1 is a placeholder that says:
# “NumPy, you decide what this dimension should be so that all elements fit perfectly.”

Original array:
 [10 20 30 40 50 60]
Shape: (6,)

Reshaped to 2x3:
 [[10 20 30]
 [40 50 60]]

Reshaped to 3x2:
 [[10 20]
 [30 40]
 [50 60]]

Reshaped with inferred rows (2, -1):
 [[10 20 30]
 [40 50 60]]


6. Real-Life Example: Outlier Detection in Customer 
Transactions
In banking, we analyze monthly turnovers for different customers and identify outliers 
(suspiciously high turnovers).
Example:
import numpy as np

In [None]:

# Monthly turnover data for 3 customers (in KES thousands)
turnovers = np.array([
 [50, 52, 48, 51, 60, 500, 55, 53, 49, 48, 52, 51], # Customer 1
 [30, 28, 35, 33, 29, 31, 32, 300, 28, 34, 30, 29], # Customer 2
 [100, 105, 98, 102, 110, 120, 115, 112, 99, 98, 101, 105], # Customer 3
])
# Compute mean and standard deviation per customer
mean_turnovers = np.mean(turnovers, axis=1)
std_turnovers = np.std(turnovers, axis=1)

# Define outlier threshold (mean + 2.5 * std deviation)
thresholds = mean_turnovers + 2.5 * std_turnovers

# Identify outliers per customer
outliers = [turnovers[i][turnovers[i] > thresholds[i]] for i in 
range(turnovers.shape[0])]
print("Outliers per customer:")
for i, outlier_list in enumerate(outliers):
 print(f"Customer {i+1}: {outlier_list}")

Outliers per customer:
Customer 1: [500]
Customer 2: [300]
Customer 3: []


In [30]:
#Creating a dataframe using numpy array

import pandas as pd

arr = np.array([[5000,4000],['John','Doe']])

df = pd.DataFrame({'Salary': arr[0], 'Name': arr[1]})  

df

Unnamed: 0,Salary,Name
0,5000,John
1,4000,Doe


## Merge Operations

In [32]:
player = ['player1', 'player2', 'player3']
points = [25, 30, 15]
title = ['Game1','Game2','Game3']
df1 = pd.DataFrame({'Player': player, 'Points': points, 'Title': title})
print(df1)

    Player  Points  Title
0  player1      25  Game1
1  player2      30  Game2
2  player3      15  Game3


In [33]:
player = ['player1', 'player5', 'player6']
power = ['Punch','Kick','Elbow']
title = ['Game1','Game5','Game6']
df2 = pd.DataFrame({'Player': player, 'power ': power, 'Title': title})
print(df2)

    Player power   Title
0  player1  Punch  Game1
1  player5   Kick  Game5
2  player6  Elbow  Game6


In [38]:
#Inner merge
merged_inner = pd.merge(left=df1, right=df2, how='inner', on='Player')
print("\nInner Merge Result:\n", merged_inner)  
print("\n")
print("or")
df1.merge(df2, on='Player', how='inner')  # inner


Inner Merge Result:
     Player  Points Title_x power  Title_y
0  player1      25   Game1  Punch   Game1


or


Unnamed: 0,Player,Points,Title_x,power,Title_y
0,player1,25,Game1,Punch,Game1


In [39]:
#left merge

df1.merge(df2, on="Player", how='left')  # left



Unnamed: 0,Player,Points,Title_x,power,Title_y
0,player1,25,Game1,Punch,Game1
1,player2,30,Game2,,
2,player3,15,Game3,,


In [41]:
#right merge

df1.merge(df2, on="Player", how='right')  # Right

Unnamed: 0,Player,Points,Title_x,power,Title_y
0,player1,25.0,Game1,Punch,Game1
1,player5,,,Kick,Game5
2,player6,,,Elbow,Game6


In [42]:
#outer merge

df1.merge(df2, on="Player", how='outer')  # outer

Unnamed: 0,Player,Points,Title_x,power,Title_y
0,player1,25.0,Game1,Punch,Game1
1,player2,30.0,Game2,,
2,player3,15.0,Game3,,
3,player5,,,Kick,Game5
4,player6,,,Elbow,Game6


## Join Operations

In [48]:
# You want to combine two DataFrames based on index,
# or join using a key column from one and index from another.
#join() uses index by default (not columns).
#You can specify another column with on=.


df1 = pd.DataFrame({'Name': ['Alice', 'Bob', 'Charlie']}, index=[1, 2, 3])
df2 = pd.DataFrame({'Score': [85, 90, 88]}, index=[2, 3, 4])

joined = df1.join(df2, how='inner')
print(joined)


      Name  Score
2      Bob     85
3  Charlie     90


## Concatenation Operations

In [46]:
pd.concat([df2,df1], axis=0)  # row-wise concatenation

Unnamed: 0,Player,power,Title,Points
0,player1,Punch,Game1,
1,player5,Kick,Game5,
2,player6,Elbow,Game6,
0,player1,,Game1,25.0
1,player2,,Game2,30.0
2,player3,,Game3,15.0


In [47]:


df1 = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
df2 = pd.DataFrame({'A': [5, 6], 'B': [7, 8]})

pd.concat([df1, df2], axis=0)


Unnamed: 0,A,B
0,1,3
1,2,4
0,5,7
1,6,8
