# Exercise

## Use this for the exercise

In [58]:
import pandas as pd
import numpy as np

# Create a DataFrame from a dictionary
data = {
    'Name': ['John', 'Alice', 'Bob', 'Emily', 'David'],
    'Age': [30, 25, 35, 28, 32],
    'City': ['New York', 'Los Angeles', 'Chicago', 'Houston', 'San Francisco'],
    'Gender': ['M', 'F', 'M', 'F', 'M']
}
df = pd.DataFrame(data)

# Save DataFrame to a CSV file
df.to_csv('data.csv', index=False)


# Create a new DataFrame from a dictionary
new_data = {
    'Name': ['Michael', 'Sarah'],
    'Age': [41, 40],
    'City': ['Miami', 'Seattle'],
    'Gender': ['M', 'F']
}

new_df = pd.DataFrame(new_data)

# Save DataFrame to a CSV file
new_df.to_csv('new_data.csv', index=False)


## Question 1:

### How do you read a CSV file into a pandas DataFrame?

In [106]:
df = pd.read_csv('data.csv')
df

Unnamed: 0,Name,Age,City,Gender
0,John,30,New York,M
1,Alice,25,Los Angeles,F
2,Bob,35,Chicago,M
3,Emily,28,Houston,F
4,David,32,San Francisco,M


In [107]:
new_df = pd.read_csv('new_data.csv')
new_df

Unnamed: 0,Name,Age,City,Gender
0,Michael,41,Miami,M
1,Sarah,40,Seattle,F


## Question 2:

### How do you select only specific columns from a DataFrame?

In [108]:
specific_columns=df[['Name', 'Age']]
specific_columns

Unnamed: 0,Name,Age
0,John,30
1,Alice,25
2,Bob,35
3,Emily,28
4,David,32


## Question 3:

### How do you filter rows in a DataFrame based on a condition?

In [109]:
# Filter rows where Age is less than 30
filter_df = df[df['Age'] < 30]
filter_df

Unnamed: 0,Name,Age,City,Gender
1,Alice,25,Los Angeles,F
3,Emily,28,Houston,F


## Question 4:

### How do you drop missing values (NaN) from a DataFrame?

In [110]:
drop_df = df.dropna()
drop_df

Unnamed: 0,Name,Age,City,Gender
0,John,30,New York,M
1,Alice,25,Los Angeles,F
2,Bob,35,Chicago,M
3,Emily,28,Houston,F
4,David,32,San Francisco,M


## Question 5:

### How do you merge two DataFrames in pandas?

In [105]:
new_df

Unnamed: 0,Name,Age,City,Gender
0,Michael,41,Miami,M
1,Sarah,40,Seattle,F


In [104]:
merged_data = pd.merge(df, new_df, how='outer')
merged_data

Unnamed: 0,Name,Age,City,Gender
0,John,30,New York,M
1,Alice,25,Los Angeles,F
2,Bob,35,Chicago,M
3,Emily,28,Houston,F
4,David,32,San Francisco,M
5,Michael,41,Miami,M
6,Sarah,40,Seattle,F


## Question 6:

### How do you create a new DataFrame from a dictionary?

In [103]:
df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age,City,Gender
0,John,30,New York,M
1,Alice,25,Los Angeles,F
2,Bob,35,Chicago,M
3,Emily,28,Houston,F
4,David,32,San Francisco,M


## Question 7:

### How do you fill missing values in a DataFrame with a specific value?

In [102]:
filled_df = df.fillna('Unknown')
filled_df

Unnamed: 0,Name,Age,City,Gender
0,John,30,New York,M
1,Alice,25,Los Angeles,F
2,Bob,35,Chicago,M
3,Emily,28,Houston,F
4,David,32,San Francisco,M


## Question 8:

### How do you create a DataFrame from a CSV file with custom column names?

In [113]:
# Define custom column names
column_names = ['Custom_Name', 'Custom_Age', 'Custom_City', 'Custom_Gender']

# Read CSV file with custom column names
df1 = pd.read_csv('data.csv', names=column_names)
df1

Unnamed: 0,Custom_Name,Custom_Age,Custom_City,Custom_Gender
0,Name,Age,City,Gender
1,John,30,New York,M
2,Alice,25,Los Angeles,F
3,Bob,35,Chicago,M
4,Emily,28,Houston,F
5,David,32,San Francisco,M


## Question 9:

### How do you merge two DataFrames based on multiple columns?

In [100]:
merged_df1 = pd.merge(df, new_df, on=['Name', 'Age', 'City', 'Gender'], how='outer')
merged_df1

Unnamed: 0,Name,Age,City,Gender
0,John,30,New York,M
1,Alice,25,Los Angeles,F
2,Bob,35,Chicago,M
3,Emily,28,Houston,F
4,David,32,San Francisco,M
5,Michael,41,Miami,M
6,Sarah,40,Seattle,F


## Question 10:

### How do you perform an inner join between two DataFrames in pandas?

In [99]:
inner_join = df.join(new_df, how='inner',lsuffix='_left', rsuffix='_right')
inner_join 

Unnamed: 0,Name_left,Age_left,City_left,Gender_left,Name_right,Age_right,City_right,Gender_right
0,John,30,New York,M,Michael,41,Miami,M
1,Alice,25,Los Angeles,F,Sarah,40,Seattle,F


## Question 11:

### How do you perform a left join between two DataFrames in pandas?

In [98]:
left_join = df.join(new_df, how='left',lsuffix='_left', rsuffix='_right')
left_join

Unnamed: 0,Name_left,Age_left,City_left,Gender_left,Name_right,Age_right,City_right,Gender_right
0,John,30,New York,M,Michael,41.0,Miami,M
1,Alice,25,Los Angeles,F,Sarah,40.0,Seattle,F
2,Bob,35,Chicago,M,,,,
3,Emily,28,Houston,F,,,,
4,David,32,San Francisco,M,,,,


## Question 12:

### How do you perform a right join between two DataFrames in pandas?

In [97]:
right_join = df.join(new_df, how='right',lsuffix='_left', rsuffix='_right')
right_join

Unnamed: 0,Name_left,Age_left,City_left,Gender_left,Name_right,Age_right,City_right,Gender_right
0,John,30,New York,M,Michael,41,Miami,M
1,Alice,25,Los Angeles,F,Sarah,40,Seattle,F


## Question 13:

### How do you concatenate two DataFrames vertically in pandas?

In [92]:
# Vertical concatenation (row-wise)
vertical_com = pd.concat([df, new_df], axis=0, keys=["df", "new_df"]) 
vertical_com

Unnamed: 0,Unnamed: 1,Name,Age,City,Gender
df,0,John,30,New York,M
df,1,Alice,25,Los Angeles,F
df,2,Bob,35,Chicago,M
df,3,Emily,28,Houston,F
df,4,David,32,San Francisco,M
new_df,0,Michael,41,Miami,M
new_df,1,Sarah,40,Seattle,F


## Question 14:

### How do you concatenate two DataFrames horizontally in pandas?

In [96]:
# Horizontal concatenation (column-wise)
horizontal_com = pd.concat([df, new_df], axis=1, keys=["df", "new_df"])  
horizontal_com

Unnamed: 0_level_0,df,df,df,df,new_df,new_df,new_df,new_df
Unnamed: 0_level_1,Name,Age,City,Gender,Name,Age,City,Gender
0,John,30,New York,M,Michael,41.0,Miami,M
1,Alice,25,Los Angeles,F,Sarah,40.0,Seattle,F
2,Bob,35,Chicago,M,,,,
3,Emily,28,Houston,F,,,,
4,David,32,San Francisco,M,,,,
