In [1]:
import numpy as np
print(np.__version__)

2.2.3


In [3]:
# Array creation
list = [10, 20, 30, 40, 50]
arr = np.array(list)
print(arr)
print("Type:", type(arr))
print("Shape:", arr.shape)

[10 20 30 40 50]
Type: <class 'numpy.ndarray'>
Shape: (5,)


In [5]:
arr1 = np.array([[1, 2, 3], [4, 5, 6]]) # 2D array
print(arr1)
print("Shape:", arr1.shape) # Rows and columns
print("Size:", arr1.size) # Size
print("First row:", arr1[0]) # First row
print("First column:", arr1[:, 0]) # First column
print("First row and second column:", arr1[0, 1]) # First row and second column

[[1 2 3]
 [4 5 6]]
Shape: (2, 3)
Size: 6
First row: [1 2 3]
First column: [1 4]
First row and second column: 2


In [11]:
# Generate arrays
print(np.zeros((2, 3))) # 2x3 array with zeros
print()
print(np.ones((2, 3))) # 2x3 array with ones
print()
print(np.full((2, 3), 5)) # 2x3 array with 5
print()
print(np.eye(3)) # Identity matrix
print()
print(np.arange(1, 10, 2)) # Array with values from 1 to 10 with step 2
print()
# 2D array with values from 1 to 10 with step 3 in 1st row and and from 2 to 10 with step 3 in 2nd row
sample = np.array([np.arange(1, 10, 3), np.arange(2, 10, 3)])
print(sample)

[[0. 0. 0.]
 [0. 0. 0.]]

[[1. 1. 1.]
 [1. 1. 1.]]

[[5 5 5]
 [5 5 5]]

[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]

[1 3 5 7 9]

[[1 4 7]
 [2 5 8]]


In [15]:
# Element-wise operations
a = np.array([[1, 2], [3, 4]])
b = np.array([[5, 6], [7, 8]])

print("Array a:")
print(a)
print("Array b:")
print(b)

print("Element-wise addition:")
print(a+b) # Element-wise addition
print("Element-wise multiplication:")
print(a*b)  # Element-wise multiplication
print("Element-wise power:")
print(a**2) # Element-wise power

Array a:
[[1 2]
 [3 4]]
Array b:
[[5 6]
 [7 8]]
Element-wise addition:
[[ 6  8]
 [10 12]]
Element-wise multiplication:
[[ 5 12]
 [21 32]]
Element-wise power:
[[ 1  4]
 [ 9 16]]


In [16]:
import numpy as np

# Purchase amounts (5 customers, 3 different purchases)
purchases = np.array([
    [200, 150, 100],
    [300, 250, 400],
    [100, 200, 150],
    [500, 600, 550],
    [50, 80, 100],
])

# Total spend per customer
# total_spend = np.sum(purchases, axis=1)
total_spend = purchases.sum(axis=1)

# Average spend per customer
# average_spend = np.mean(purchases, axis=1)
average_spend = purchases.mean(axis=1)

print("Total spend per customer:", total_spend)
print("Average spend per customer:", average_spend)
print("Customer with max average spend:", max(average_spend))

Total spend per customer: [ 450  950  450 1650  230]
Average spend per customer: [150.         316.66666667 150.         550.          76.66666667]
Customer with max average spend: 550.0


In [None]:
# System of Linear Equation

# 2x + 3y = 8
# 4x + y = 10

# Coefficients of variables
A = np.array([[2, 3], [4, 1]])

# Constants on the right-hand side
B = np.array([8, 10])

# Solve for x and y
solution = np.linalg.solve(A, B)

print("Solution (x, y):", solution)

Solution (x, y): [2.2 1.2]


In [None]:
# Sales Assignment
sales = np.array([1000, 1800, 1500, 1100, 1900, 1500, 1950, 2000, 1750, 1400])

print("Total sales:", sum(sales))
print("Average sales:", sales.mean())
print("Maximum sales:", max(sales))
print("Minimum sales:", min(sales))

# Days where sales were above average
sales_above_average = sales[sales > sales.mean()]
print("Days where sales were above average:", sales_above_average)

# 10% increase in sales
increased_sales = np.array(sales * 1.1)
print("10% increase in sales:", increased_sales)

# Sort sales in ascending order
sorted_sales = np.sort(sales)
print("Sorted sales:", sorted_sales)


Total sales: 15900
Average sales: 1590.0
Maximum sales: 2000
Minimum sales: 1000
Days where sales were above average: [1800 1900 1950 2000 1750]
10% increase in sales: [1100. 1980. 1650. 1210. 2090. 1650. 2145. 2200. 1925. 1540.]
Sorted sales: [1000 1100 1400 1500 1500 1750 1800 1900 1950 2000]


In [31]:
import pandas as pd 

df = pd.read_csv("Sales_Data.csv")
df.isnull ()
df.fillna(0, inplace=True)

In [33]:
df.duplicated()
df.drop_duplicates(inplace=True)

In [None]:
pd.to_datetime(df['Date'])

0   2024-02-01
1   2024-02-02
2   2024-02-03
3   2024-02-04
4   2024-02-05
5   2024-02-06
6   2024-02-07
7   2024-02-08
8   2024-02-09
9   2024-02-10
Name: Date, dtype: datetime64[ns]

In [39]:
df['Quantity'] = df['Quantity'].astype(int)
df.dtypes

Order_ID          int64
Product          object
Category         object
Quantity          int64
Price             int64
Total_Amount    float64
Date             object
dtype: object

In [40]:
# Outlier removal using IQR method

# Sample data with outlier
data = np.array([10, 12, 14, 15, 18, 21, 25, 100])

# Calculate Q1, Q3 and IQR
Q1 = np.percentile(data, 25)
Q3 = np.percentile(data, 75)
IQR = Q3 - Q1

# Define the bounds for outliers
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

# Identify the outliers
outliers = data[(data < lower_bound) | (data > upper_bound)]

print("Outliers:", outliers)

Outliers: [100]
