**Cleaning Messy Data with Pandas**

In [1]:
import pandas as pd

# Load dataset
df = pd.read_csv("wiki.csv")

# Inspect dataset
print(df.info())

# Visualizing missing values
print(df.isnull().sum())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 11 columns):
 #   Column                            Non-Null Count  Dtype 
---  ------                            --------------  ----- 
 0   Rank                              20 non-null     int64 
 1   Peak                              9 non-null      object
 2   All Time Peak                     6 non-null      object
 3   Actual gross                      20 non-null     object
 4   Adjusted gross (in 2022 dollars)  20 non-null     object
 5   Artist                            20 non-null     object
 6   Tour title                        20 non-null     object
 7   Year(s)                           20 non-null     object
 8   Shows                             20 non-null     int64 
 9   Average gross                     20 non-null     object
 10  Ref.                              20 non-null     object
dtypes: int64(2), object(9)
memory usage: 1.8+ KB
None
Rank                                

In [5]:
# Fill missing values with median
df_filled = df.fillna(df.median(numeric_only=True))

# Drop rows with any missing values
df_dropped_rows = df.dropna()

# Drop a specific column with many missing values
df_dropped_column = df.drop(columns=["All Time Peak"])

**Numerical Computing with NumPy**

In [11]:
import numpy as np

# Create NumPy arrays
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])

# Element-wise addition
add_result = a + b 

# Broadcasting
matrix = np.array([[1, 2, 3], [4, 5, 6]])
broadcast_result = matrix + 10  # Add 10 to each element

# Dot product
dot_result = np.dot(a, b)

print(f'Element-wise addition:{add_result}')
print(f'Broadcasting:{broadcast_result}')
print(f'Dot Product:{dot_result}')

Element-wise addition:[5 7 9]
Broadcasting:[[11 12 13]
 [14 15 16]]
Dot Product:32


**Normalize a Matrix with NumPy**

In [12]:
# Sample feature matrix
X = np.array([[1, 2], [3, 4], [5, 6]])

# Normalize (zero mean, unit variance)
X_normalized = (X - X.mean(axis=0)) / X.std(axis=0)

print("Normalized Matrix:\n", X_normalized)

Normalized Matrix:
 [[-1.22474487 -1.22474487]
 [ 0.          0.        ]
 [ 1.22474487  1.22474487]]
