# Jupyter Notebook: Intermediate Pandas Skills

## 5. Multi-Indexing

### 5.1 Creating MultiIndex



In [29]:
# Creating a MultiIndex DataFrame
import pandas as pd 
import numpy as np 

# Creating a Series
s = pd.Series([1, 3, 5, np.nan, 6, 8])
print(s)

# Creating a DataFrame
df = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 22],
    'City': ['New York', 'San Francisco', 'Los Angeles']
})
print(df)



0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64
      Name  Age           City
0    Alice   25       New York
1      Bob   30  San Francisco
2  Charlie   22    Los Angeles


In [30]:
multi_index_df = df.set_index(['City', 'Name'])


### 5.2 Working with MultiIndex


In [31]:
# Selecting data from a MultiIndex DataFrame


selected_data = multi_index_df.loc['New York']
selected_data

Unnamed: 0_level_0,Age
Name,Unnamed: 1_level_1
Alice,25


## 6. Merging and Joining
### 6.1 Combining DataFrames
#### 6.1.1 Concatenation

In [32]:
import pandas as pd

# Create DataFrame df1
df1 = pd.DataFrame({
    'City': ['New York', 'San Francisco', 'Los Angeles'],
    'Population': [8500000, 884000, 3990000],
    'Area (sq. miles)': [468.9, 46.9, 468.7]
})

# Create DataFrame df2
df2 = pd.DataFrame({
    'City': ['New York', 'San Francisco', 'Los Angeles'],
    'Average Income': [65000, 80000, 70000],
    'Median Age': [35, 38, 34]
})

# Display the DataFrames
print("DataFrame 1:")
print(df1)

print("\nDataFrame 2:")
print(df2)

DataFrame 1:
            City  Population  Area (sq. miles)
0       New York     8500000             468.9
1  San Francisco      884000              46.9
2    Los Angeles     3990000             468.7

DataFrame 2:
            City  Average Income  Median Age
0       New York           65000          35
1  San Francisco           80000          38
2    Los Angeles           70000          34


In [33]:
# Concatenating DataFrames along rows
concatenated_df = pd.concat([df1, df2])


#### 6.1.2 Merging


In [37]:
# Merging DataFrames based on a common column
merged_df = pd.merge(df1, df2, on='City', how='inner')


### 6.2 Joining DataFrames

In [35]:
# Joining DataFrames based on index
joined_df = df1.join(df2, how='inner', lsuffix='_left', rsuffix='_right')


## 7. Time Series Data
### 7.1 Handling Time Series Data
#### 7.1.1 Converting to DateTime

In [41]:
# Generating date-related data
date_range = pd.date_range(start='2022-01-01', periods=5, freq='D')
date_df = pd.DataFrame({'Date': date_range, 'Value': [10, 15, 20, 25, 30]})




7.1.2 Resampling Time Series Data


In [42]:
# Converting a column to DateTime format
date_df['Date'] = pd.to_datetime(date_df['Date'])


#### 7.1.2 Converting to DateTime


In [43]:
# Converting a column to DateTime format
date_df['Date'] = pd.to_datetime(date_df['Date'])


#### 7.1.3 Resampling Time Series Data


In [44]:
# Resampling daily data to monthly data
monthly_data = date_df.resample('M', on='Date').sum()


  monthly_data = date_df.resample('M', on='Date').sum()


## 8. Advanced Data Cleaning
### 8.1 Handling Outliers

In [54]:
# Generating date-related data
date_range = pd.date_range(start='2022-01-01', periods=5, freq='D')
date_df = pd.DataFrame({'Date': date_range, 'Value': [10, 15, 20, 25, 30]})

# Detecting and handling outliers using Z-score
from scipy.stats import zscore
outliers = date_df[(np.abs(zscore(date_df['Value'])) > 3)]
cleaned_df = date_df[(np.abs(zscore(date_df['Value'])) <= 3)]


### 8.2 Advanced Techniques for Handling Missing Data


In [55]:
# Interpolating missing values in a DataFrame
interpolated_df = df.interpolate()


  interpolated_df = df.interpolate()


## 8.3 Practice Exercises
### 8.3.1 Exercise 5: MultiIndex Selection

In [None]:
# Your task: Select data for individuals named 'Alice' from 'New York'.


In [57]:
selected_data_alice = multi_index_df.loc[('New York', 'Alice')]
