<a href="https://colab.research.google.com/github/AFSHAL-7/Datascience/blob/main/Afshal1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder

# Sample DataFrame
data = {
    "Name": ["Alice", "Bob", "Charlie", "David", "Eve"],
    "Age": [25, np.nan, 30, 22, 28],
    "Marks": [85, 78, np.nan, 90, 88],
    "Attendance": [90, np.nan, 80, 95, 85],
    "Passed": ["Yes", "No", "Yes", "No", np.nan]
}
df = pd.DataFrame(data)

# Check for missing values
print(df.isnull().sum())

# Remove missing values
df_cleaned = df.dropna()
print(df_cleaned)

# Fill missing values
# Numerical: Mean/Median Imputation
df["Age"].fillna(df["Age"].mean(), inplace=True)
df["Marks"].fillna(df["Marks"].median(), inplace=True)
df["Attendance"].fillna(df["Attendance"].mean(), inplace=True)

# Categorical: Mode Imputation
df["Passed"].fillna(df["Passed"].mode()[0], inplace=True)

# Forward Fill & Backward Fill
df.ffill(inplace=True)
df.bfill(inplace=True)

# Remove Duplicates
df.drop_duplicates(inplace=True)

# Feature Scaling
scaler = StandardScaler()
df_scaled = df.copy()
df_scaled[["Marks", "Attendance"]] = scaler.fit_transform(df[["Marks", "Attendance"]])
print(df_scaled)

# Min-Max Scaling
scaler = MinMaxScaler()
df_scaled[["Marks", "Attendance"]] = scaler.fit_transform(df[["Marks", "Attendance"]])
print(df_scaled)

# One-Hot Encoding
df_encoded = pd.get_dummies(df, columns=["Passed"], drop_first=True)
print(df_encoded)

# Label Encoding
encoder = LabelEncoder()
df["Passed"] = encoder.fit_transform(df["Passed"])
print(df)

# Feature Engineering
# Creating a new feature: Performance Category
def performance_category(marks):
    if marks >= 85:
        return "High"
    elif marks >= 70:
        return "Medium"
    else:
        return "Low"

df["Performance"] = df["Marks"].apply(performance_category)
print(df)

# Binning (Converting Continuous to Categorical Data)
df["Age_Group"] = pd.cut(df["Age"], bins=[18, 21, 24, 30], labels=["Young", "Adult", "Senior"])
print(df)

Name          0
Age           1
Marks         1
Attendance    1
Passed        1
dtype: int64
    Name   Age  Marks  Attendance Passed
0  Alice  25.0   85.0        90.0    Yes
3  David  22.0   90.0        95.0     No
      Name    Age     Marks  Attendance Passed
0    Alice  25.00 -0.121988         0.5    Yes
1      Bob  26.25 -1.829813         0.0     No
2  Charlie  30.00  0.243975        -1.5    Yes
3    David  22.00  1.097888         1.5     No
4      Eve  28.00  0.609938        -0.5     No
      Name    Age     Marks  Attendance Passed
0    Alice  25.00  0.583333    0.666667    Yes
1      Bob  26.25  0.000000    0.500000     No
2  Charlie  30.00  0.708333    0.000000    Yes
3    David  22.00  1.000000    1.000000     No
4      Eve  28.00  0.833333    0.333333     No
      Name    Age  Marks  Attendance  Passed_Yes
0    Alice  25.00   85.0        90.0        True
1      Bob  26.25   78.0        87.5       False
2  Charlie  30.00   86.5        80.0        True
3    David  22.00   90.0

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["Age"].fillna(df["Age"].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["Marks"].fillna(df["Marks"].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are sett