## Data Manipulation using Python

### Question 1: Filtering Data
#### Task: Filter the rows of a DataFrame where the value in the "age" column is greater than 25.

In [1]:
import pandas as pd

# Sample DataFrame
data = {'name': ['Alice', 'Bob', 'Charlie', 'David'],
        'age': [24, 27, 22, 32],
        'city': ['New York', 'Los Angeles', 'Chicago', 'Houston']}
df = pd.DataFrame(data)

# Filter rows where age > 25
filtered_df = df[df['age'] > 25]
print(filtered_df)


    name  age         city
1    Bob   27  Los Angeles
3  David   32      Houston


In [11]:
import pandas as pd

# Sample DataFrame
data = {'name': ['Alice', 'Bob', 'Charlie', 'David'],
        'age': [24, 27, 22, 32],
        'city': ['New York', 'Los Angeles', 'Chicago', 'Houston']}
df = pd.DataFrame(data)

# Filter rows where age > 25
filtered_df = df[(df['age'] > 23) & (df['city'] == "New York")]
print(filtered_df)


    name  age      city
0  Alice   24  New York


### Question 2: Grouping and Aggregation
#### Task: Group the DataFrame by the "city" column and calculate the mean age for each city.

In [12]:
import pandas as pd

# Sample DataFrame
data = {'name': ['Alice', 'Bob', 'Charlie', 'David'],
        'age': [24, 27, 22, 32],
        'city': ['New York', 'Los Angeles', 'Chicago', 'Houston']}
df = pd.DataFrame(data)

# Group by city and calculate mean age
grouped_df = df.groupby('city')['age'].mean().reset_index()
print(grouped_df)


          city   age
0      Chicago  22.0
1      Houston  32.0
2  Los Angeles  27.0
3     New York  24.0


### Question 3: Adding a New Column
#### Task: Add a new column to the DataFrame that shows whether the person is above 25 years old.



In [13]:
import pandas as pd

# Sample DataFrame
data = {'name': ['Alice', 'Bob', 'Charlie', 'David'],
        'age': [24, 27, 22, 32],
        'city': ['New York', 'Los Angeles', 'Chicago', 'Houston']}
df = pd.DataFrame(data)

# Add a new column 'above_25'
df['above_25'] = df['age'] > 25
print(df)


      name  age         city  above_25
0    Alice   24     New York     False
1      Bob   27  Los Angeles      True
2  Charlie   22      Chicago     False
3    David   32      Houston      True


### Question 4: Merging DataFrames
#### Task: Merge two DataFrames on the "name" column.

In [16]:
import pandas as pd

# Sample DataFrames
data1 = {'name': ['Alice', 'Bob', 'Charlie'],
         'age': [24, 27, 22]}
df1 = pd.DataFrame(data1)

data2 = {'name': ['Alice', 'Bob', 'David'],
         'city': ['New York', 'Los Angeles', 'Houston']}
df2 = pd.DataFrame(data2)

# Merge DataFrames on 'name'
merged_df = pd.merge(df1, df2, on='name', how='inner')
print(merged_df)


    name  age         city
0  Alice   24     New York
1    Bob   27  Los Angeles


### Question 5: Handling Missing Data
#### Task: Fill the missing values in the "age" column with the mean age of the DataFrame.

In [18]:
import pandas as pd

# Sample DataFrame with missing values
data = {'name': ['Alice', 'Bob', 'Charlie', 'David'],
        'age': [24, None, 22, 32],
        'city': ['New York', 'Los Angeles', 'Chicago', 'Houston']}
df = pd.DataFrame(data)

# Fill missing values in 'age' with the mean age
df['age'].fillna(df['age'].mean(), inplace=True)
print(df)


      name   age         city
0    Alice  24.0     New York
1      Bob  26.0  Los Angeles
2  Charlie  22.0      Chicago
3    David  32.0      Houston
