In [33]:
import pandas as pd
import numpy as np

In [35]:
# --- Missing Values

In [37]:
data = {
    'Name': ['Alice', 'Bob', None, 'Charlie'],
    'Age': [30, None, 35, 25],
    'City': ['New York', 'Los Angeles', None, 'Chicago']
}

df = pd.DataFrame(data)

In [39]:
print(df.isnull())

    Name    Age   City
0  False  False  False
1  False   True  False
2   True  False   True
3  False  False  False


In [41]:
# --- Dropping Missing Values

In [43]:
df = df.dropna()

In [45]:
print(df)

      Name   Age      City
0    Alice  30.0  New York
3  Charlie  25.0   Chicago


In [47]:
# --- Data Cleaning Techniques

In [49]:
# --- Removing Duplicates

In [51]:
data_with_duplicates = {
    'Name': ['Alice', 'Bob', 'Alice', 'Charlie'],
    'Age': [30, 25, 30, 25],
    'City': ['New York', 'Los Angeles', 'New York', 'Chicago']
}

df_duplicates = pd.DataFrame(data_with_duplicates)

df_no_duplicates = df_duplicates.drop_duplicates()

In [53]:
print(df_no_duplicates)

      Name  Age         City
0    Alice   30     New York
1      Bob   25  Los Angeles
3  Charlie   25      Chicago


In [54]:
# --- Converting Data Types

In [57]:
data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, None]  
}

df = pd.DataFrame(data)

print("Columns in DataFrame:", df.columns)

df.columns = df.columns.str.strip()

if 'Age' in df.columns:
    df['Age'] = df['Age'].astype('Int64')  
else:
    print("Column 'Age' does not exist in the DataFrame.")

print(df.dtypes)

Columns in DataFrame: Index(['Name', 'Age'], dtype='object')
Name    object
Age      Int64
dtype: object


In [59]:
# --- Renaming Columns

In [61]:
df_renamed = df.rename(columns={'Name': 'Full Name', 'City': 'Location'})

In [63]:
print(df_renamed)

  Full Name   Age
0     Alice    25
1       Bob    30
2   Charlie  <NA>


In [65]:
# --- String Cleaning

In [67]:
data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, None],
    'City': [' New York ', 'Los Angeles', 'Chicago']  
}

df = pd.DataFrame(data)

print("Columns in DataFrame:", df.columns.tolist())

df.columns = df.columns.str.strip()

if 'City' in df.columns:
    df['City'] = df['City'].str.strip().str.lower()  
    print("\nUpdated 'City' column:")
    print(df['City'])
else:
    print("Column 'City' does not exist in the DataFrame.")

Columns in DataFrame: ['Name', 'Age', 'City']

Updated 'City' column:
0       new york
1    los angeles
2        chicago
Name: City, dtype: object
