**Name: Mohammed Varaliya**

**Roll No: T109**

## Removing Rows Missing Values:

In [1]:
import pandas as pd

# Sample DataFrame with missing values
data = {'A': [1, 2, None, 4], 'B': [5, None, 7, 8]}
df = pd.DataFrame(data)

# Drop rows with any missing values
df_dropped_rows = df.dropna()

# Drop columns with any missing values
df_dropped_columns = df.dropna(axis=1)

print(df.head())

print("DataFrame with Dropped Rows:")
print(df_dropped_rows)



     A    B
0  1.0  5.0
1  2.0  NaN
2  NaN  7.0
3  4.0  8.0
DataFrame with Dropped Rows:
     A    B
0  1.0  5.0
3  4.0  8.0


## Imputation with Mean, Median, or Mode:

In [2]:
# Fill missing values with mean, median, or mode
df_imputed_mean = df.fillna(df.mean())
df_imputed_median = df.fillna(df.median())
df_imputed_mode = df.fillna(df.mode().iloc[0])  # Using the mode

print("DataFrame with Mean Imputation:")
print(df_imputed_mean)

print("\nDataFrame with Median Imputation:")
print(df_imputed_median)

print("\nDataFrame with Mode Imputation:")
print(df_imputed_mode)


DataFrame with Mean Imputation:
          A         B
0  1.000000  5.000000
1  2.000000  6.666667
2  2.333333  7.000000
3  4.000000  8.000000

DataFrame with Median Imputation:
     A    B
0  1.0  5.0
1  2.0  7.0
2  2.0  7.0
3  4.0  8.0

DataFrame with Mode Imputation:
     A    B
0  1.0  5.0
1  2.0  5.0
2  1.0  7.0
3  4.0  8.0


## Using KNNImputer:

In [4]:
from sklearn.impute import KNNImputer

# Using KNN imputer for advanced imputation
knn_imputer = KNNImputer(n_neighbors=2)
df_knn_imputed = pd.DataFrame(knn_imputer.fit_transform(df), columns=df.columns)

print("DataFrame with KNN Imputation:")
print(df_knn_imputed)


DataFrame with KNN Imputation:
     A    B
0  1.0  5.0
1  2.0  6.5
2  2.5  7.0
3  4.0  8.0


## Managing Outliers

In [5]:
from sklearn.preprocessing import RobustScaler
import numpy as np

# Create a sample data array with outliers
data = np.array([[1.0, 2.0], [2.0, 3.0], [3.0, 4.0], [100.0, 200.0]])
scaler = RobustScaler()
robust_scaled_data = scaler.fit_transform(data)

print("Original Data:")
print(data)
print("\nRobust Scaled Data:")
print(robust_scaled_data)


Original Data:
[[  1.   2.]
 [  2.   3.]
 [  3.   4.]
 [100. 200.]]

Robust Scaled Data:
[[-0.05882353 -0.02985075]
 [-0.01960784 -0.00995025]
 [ 0.01960784  0.00995025]
 [ 3.82352941  3.91044776]]


In [6]:
import numpy as np
from sklearn.impute import SimpleImputer

# Creating a sample dataset with missing values
data = np.array([[1, 2, np.nan],
                 [4, np.nan, 6],
                 [7, 8, 9]])

# Initializing SimpleImputer with strategy 'mean'
imputer = SimpleImputer(strategy='mean')

# Fitting and transforming the data using the imputer
imputed_data = imputer.fit_transform(data)

print("Original Data:\n", data)
print("\nImputed Data:\n", imputed_data)


Original Data:
 [[ 1.  2. nan]
 [ 4. nan  6.]
 [ 7.  8.  9.]]

Imputed Data:
 [[1.  2.  7.5]
 [4.  5.  6. ]
 [7.  8.  9. ]]
