In [57]:
import pandas as pd
import numpy as np

In [None]:
# --- Loading Data 

In [79]:
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', None],
    'Age': [25, None, 30, 22, 35],
    'City': ['New York', 'Los Angeles', 'Chicago', 'New York', 'Chicago'],
    'Score': [85, 92, None, 70, 88]
}

df = pd.DataFrame(data)
print("Original DataFrame:")
print(df)

Original DataFrame:
      Name   Age         City  Score
0    Alice  25.0     New York   85.0
1      Bob   NaN  Los Angeles   92.0
2  Charlie  30.0      Chicago    NaN
3    David  22.0     New York   70.0
4     None  35.0      Chicago   88.0


In [None]:
# --- Handling Missing Values

In [None]:
# --- Identifying 

In [81]:
print("\nMissing Values:")
print(df.isnull().sum())


Missing Values:
Name     1
Age      1
City     0
Score    1
dtype: int64


In [None]:
# --- Droping

In [83]:
df_dropped = df.dropna()
print("\nDataFrame after dropping rows with missing values:")
print(df_dropped)


DataFrame after dropping rows with missing values:
    Name   Age      City  Score
0  Alice  25.0  New York   85.0
3  David  22.0  New York   70.0


In [None]:
# --- Filling

In [35]:
df['Age'] = df['Age'].fillna(df['Age'].mean())

print("\nDataFrame after filling missing values:")
print(df)


DataFrame after filling missing values:
      Name   Age         City  Score
0    Alice  25.0     New York   85.0
1      Bob  28.0  Los Angeles   92.0
2  Charlie  30.0      Chicago    NaN
3    David  22.0     New York   70.0
4     None  35.0      Chicago   88.0


In [None]:
# --- Removing Duplicates

In [45]:
data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, None],
}

df = pd.DataFrame(data)

print("Original DataFrame:")
print(df)

df = pd.concat([df, df.iloc[[1]]], ignore_index=True)

print("\nDataFrame with duplicates:")
print(df)

Original DataFrame:
      Name   Age
0    Alice  25.0
1      Bob  30.0
2  Charlie   NaN

DataFrame with duplicates:
      Name   Age
0    Alice  25.0
1      Bob  30.0
2  Charlie   NaN
3      Bob  30.0


In [None]:
# --- Renaming Columns

In [47]:
df.rename(columns={'Name': 'Full Name', 'Score': 'Test Score'}, inplace=True)
print("\nDataFrame after renaming columns:")
print(df)


DataFrame after renaming columns:
  Full Name   Age
0     Alice  25.0
1       Bob  30.0
2   Charlie   NaN
3       Bob  30.0


In [None]:
# --- Changing Data Types

In [63]:
data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, None],
}

df = pd.DataFrame(data)

df['Age'] = df['Age'].fillna(df['Age'].mean())

df['Age'] = df['Age'].astype(int)

print("\nDataFrame after changing 'Age' to integer:")
print(df)


DataFrame after changing 'Age' to integer:
      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   27


In [None]:
# --- Filtering Data

In [65]:
filtered_df = df[df['Age'] > 25]
print("\nFiltered DataFrame (Age > 25):")
print(filtered_df)


Filtered DataFrame (Age > 25):
      Name  Age
1      Bob   30
2  Charlie   27


In [None]:
# --- Grouping & Aggregating Data

In [71]:
data = {
    'City': ['New York', 'Los Angeles', 'New York', 'Chicago'],
    'Test Score': [85, 90, 78, 88],
}

df = pd.DataFrame(data)

print("Columns in the DataFrame:")
print(df.columns)

if 'City' in df.columns and 'Test Score' in df.columns:
    average_score = df.groupby('City')['Test Score'].mean()
    print("\nAverage Test Score by City:")
    print(average_score)
else:
    print("The required columns are not present in the DataFrame.")

Columns in the DataFrame:
Index(['City', 'Test Score'], dtype='object')

Average Test Score by City:
City
Chicago        88.0
Los Angeles    90.0
New York       81.5
Name: Test Score, dtype: float64


In [None]:
# --- Adding New Columns

In [77]:
data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 15, None], 
}

df = pd.DataFrame(data)

# Check columns and data
print("Columns in the DataFrame:")
print(df.columns)
print("\nDataFrame preview:")
print(df.head())

if 'Age' in df.columns:
    df['Age Group'] = ['Adult' if age >= 18 else 'Minor' if pd.notna(age) else 'Unknown' for age in df['Age']]
    print("\nDataFrame after adding 'Age Group':")
    print(df)
else:
    print("'Age' column does not exist in the DataFrame.")

Columns in the DataFrame:
Index(['Name', 'Age'], dtype='object')

DataFrame preview:
      Name   Age
0    Alice  25.0
1      Bob  15.0
2  Charlie   NaN

DataFrame after adding 'Age Group':
      Name   Age Age Group
0    Alice  25.0     Adult
1      Bob  15.0     Minor
2  Charlie   NaN   Unknown
